Added checkChiral for chiral systems (Reference/Cuda)

Added support for missing axis types (Cuda only) Removed rotationMatrix

Added checkChiral for chiral systems (Reference/Cuda)
Added support for missing axis types (Cuda only) Removed rotationMatrix
09fb3811 · Mark Friedrichs · 562cfb39 · 09fb3811 · 09fb3811 · 09fb3811
Commit 09fb3811 authored Nov 18, 2010 by Mark Friedrichs
10 changed files
--- a/plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
+++ b/plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
@@ -63,7 +63,7 @@ public:
        PME = 1 
    };  
-    enum MultipoleAxisTypes { ZThenX, Bisector };
+    enum MultipoleAxisTypes { ZThenX, Bisector, ZBisect, ThreeFold, ZOnly, LastAxisTypeIndex };
    // Algorithm used to converge mutual induced dipoles:
    //     SOR: successive-over-relaxation

--- a/plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
+++ b/plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
@@ -62,12 +62,17 @@ void AmoebaMultipoleForceImpl::initialize(ContextImpl& context) {
        owner.getMultipoleParameters( ii, charge, molecularDipole, molecularQuadrupole, axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY,
                                      thole, dampingFactor, polarity );
-       // only 'Z-then-X' or 'Bisector' currently handled
+       // only 'Z-then-X', 'Bisector', Z-Bisect, ThreeFold  currently handled
-        if( axisType != AmoebaMultipoleForce::ZThenX && axisType != AmoebaMultipoleForce::Bisector ){
+        if( axisType != AmoebaMultipoleForce::ZThenX  && axisType != AmoebaMultipoleForce::Bisector &&
+            axisType != AmoebaMultipoleForce::ZBisect && axisType != AmoebaMultipoleForce::ThreeFold &&
+            axisType != AmoebaMultipoleForce::ZOnly ) {
             std::stringstream buffer;
             buffer << "AmoebaMultipoleForce: axis type=" << axisType;
-             buffer << " not currently handled - only axisTypes[ " << AmoebaMultipoleForce::ZThenX << ", " << AmoebaMultipoleForce::Bisector << "] (ZThenX, Bisector) currently handled .";
+             buffer << " not currently handled - only axisTypes[ ";
+             buffer << AmoebaMultipoleForce::ZThenX   << ", " << AmoebaMultipoleForce::Bisector << ", ";
+             buffer << AmoebaMultipoleForce::ZBisect  << ", " << AmoebaMultipoleForce::ThreeFold;
+             buffer << "] (ZThenX, Bisector, Z-Bisect, ThreeFold) currently handled .";
             throw OpenMMException(buffer.str());
        }
    }

--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
@@ -338,11 +338,20 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
    (void) fprintf( log, "     pAmoebaUreyBradleyID               %p\n",      amoebaGpu->amoebaSim.pAmoebaUreyBradleyID );
    (void) fprintf( log, "     pAmoebaUreyBradleyParameter        %p\n",      amoebaGpu->amoebaSim.pAmoebaUreyBradleyParameter );
-    if( amoebaGpu->psRotationMatrix)(void) fprintf( log, "\n" );
+//    if( amoebaGpu->psRotationMatrix)(void) fprintf( log, "\n" );
-    gpuPrintCudaStreamFloat( amoebaGpu->psRotationMatrix, log );
+//    gpuPrintCudaStreamFloat( amoebaGpu->psRotationMatrix, log );
+//    (void) fprintf( log, "     pRotationMatrix                    %p\n",      amoebaGpu->amoebaSim.pRotationMatrix);
    gpuPrintCudaStreamInt4( amoebaGpu->psMultipoleParticlesIdsAndAxisType, log );
+    (void) fprintf( log, "     pMultipoleParticlesIdsAndAxisType  %p\n",      amoebaGpu->amoebaSim.pMultipoleParticlesIdsAndAxisType);
+    gpuPrintCudaStreamInt( amoebaGpu->psMultipoleAxisOffset, log );
+    (void) fprintf( log, "     pMultipoleAxisOffset               %p\n",      amoebaGpu->amoebaSim.pMultipoleAxisOffset);
    gpuPrintCudaStreamFloat( amoebaGpu->psMolecularDipole, log );
+    (void) fprintf( log, "     pMolecularDipole                   %p\n",      amoebaGpu->amoebaSim.pMolecularDipole);
    gpuPrintCudaStreamFloat( amoebaGpu->psMolecularQuadrupole, log );
+    (void) fprintf( log, "     pMolecularQuadrupole               %p\n",      amoebaGpu->amoebaSim.pMolecularQuadrupole );
    gpuPrintCudaStreamFloat( amoebaGpu->psLabFrameDipole, log );
    gpuPrintCudaStreamFloat( amoebaGpu->psLabFrameQuadrupole, log );
@@ -1276,7 +1285,7 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
    // ---------------------------------------------------------------------------------------
-    if( amoebaGpu->psRotationMatrix != NULL ){
+    if( amoebaGpu->psMultipoleParticlesIdsAndAxisType != NULL ){
        return;
    }
@@ -1289,13 +1298,22 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
    // work space
-    amoebaGpu->psRotationMatrix                      = new CUDAStream<float>(9*amoebaGpu->paddedNumberOfAtoms, 1, "RotationMatrix");
+//    amoebaGpu->psRotationMatrix                            = new CUDAStream<float>(9*amoebaGpu->paddedNumberOfAtoms, 1, "RotationMatrix");
+//    amoebaGpu->amoebaSim.pRotationMatrix                   = amoebaGpu->psRotationMatrix->_pDevStream[0];
    // parameters
    amoebaGpu->psMultipoleParticlesIdsAndAxisType          = new CUDAStream<int4>(amoebaGpu->paddedNumberOfAtoms,    1, "MultipoleParticlesIdsAndAxisType");
+    amoebaGpu->amoebaSim.pMultipoleParticlesIdsAndAxisType = amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0];
+    amoebaGpu->psMultipoleAxisOffset                       = new CUDAStream<int>(amoebaGpu->paddedNumberOfAtoms,    1, "psMultipoleAxisOffset");
+    amoebaGpu->amoebaSim.pMultipoleAxisOffset              = amoebaGpu->psMultipoleAxisOffset->_pDevStream[0];
    amoebaGpu->psMolecularDipole                           = new CUDAStream<float>(3*amoebaGpu->paddedNumberOfAtoms, 1, "MolecularDipole");
+    amoebaGpu->amoebaSim.pMolecularDipole                  = amoebaGpu->psMolecularDipole->_pDevStream[0];
    amoebaGpu->psMolecularQuadrupole                       = new CUDAStream<float>(9*amoebaGpu->paddedNumberOfAtoms, 1, "MolecularQuadrupole");
+    amoebaGpu->amoebaSim.pMolecularQuadrupole              = amoebaGpu->psMolecularQuadrupole->_pDevStream[0];
    // output
@@ -1306,6 +1324,7 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
    amoebaGpu->amoebaSim.pLabFrameQuadrupole               = amoebaGpu->psLabFrameQuadrupole->_pDevStream[0];
    memset( amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0], 0, sizeof(int)*4*amoebaGpu->paddedNumberOfAtoms );
+    memset( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0],              0, sizeof(int)*amoebaGpu->paddedNumberOfAtoms );
 }
 static void gpuFixedEFieldAllocate( amoebaGpuContext amoebaGpu )
@@ -1597,7 +1616,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
    std::vector<int> maxIndices;
    for( unsigned int ii = 0; ii < charges.size(); ii++ ){
        maxIndices.push_back(ii);
-        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z   = ii;
+        amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii]   = ii;
    }
    if( nonbondedMethod == 0 ){
@@ -1623,37 +1642,64 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
        amoebaGpu->amoebaSim.dielec               = 1.0f;
    }
+    static const int maxAxisType = 5;
+    int axisTypeCount[maxAxisType+1] = { 0, 0, 0, 0, 0, 0 };
    for( int ii = 0; ii < static_cast<int>(charges.size()); ii++ ){
        // axis type & multipole particles ids
-        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x       = multipoleParticleZ[ii];
+        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x       = multipoleParticleX[ii];
-        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y       = multipoleParticleX[ii];
+        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y       = multipoleParticleY[ii];
+        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z       = multipoleParticleZ[ii];
        amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w       = axisType[ii];
+        if( axisType[ii] < (maxAxisType) && axisType[ii] > -1 ){
+            axisTypeCount[axisType[ii]]++;
+        } else {
+            axisTypeCount[maxAxisType]++;
+        }
+        // for z-only need to add access to random numbers
+        // and need test system
+        if( axisType[ii] == 4 ){
+            //fprintf( stderr, "Axis type z-only (atom=%d) not fully implemented -- aborting.\n", ii );
+            fprintf( stderr, "Warning: Axis type z-only (atom=%d) not fully implemented.\n", ii );
+//            exit(0);
+        }
        int axisParticleIndex                                                     = multipoleParticleZ[ii];
        if( maxIndices[axisParticleIndex] < ii ){
            maxIndices[axisParticleIndex] = ii;
        }
-        if( amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z > ii ){
+        if( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] > ii ){
-            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z = ii;
+            amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] = ii;
        }
        axisParticleIndex                                                         = multipoleParticleX[ii];
        if( maxIndices[axisParticleIndex] < ii ){
            maxIndices[axisParticleIndex] = ii;
        }
-        if( amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z > ii ){
+        if( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] > ii ){
-            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z = ii;
+            amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] = ii;
+        }
+        axisParticleIndex                                                         = multipoleParticleY[ii];
+        if( axisParticleIndex > -1 ){
+            if( maxIndices[axisParticleIndex] < ii ){
+                maxIndices[axisParticleIndex] = ii;
+            }
+            if( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] > ii ){
+                amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] = ii;
+            }
        }
        if( 0 && amoebaGpu->log )
            fprintf( amoebaGpu->log, "Z1 %4d [%4d %4d] %4d %4d %4d %4d   %d %d\n", ii,
-                     multipoleParticleZ[ii], multipoleParticleX[ii],
+                     multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii],
-                     amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][multipoleParticleZ[ii]].z,
+                     amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][multipoleParticleZ[ii]],
                     maxIndices[multipoleParticleZ[ii]],
-                     amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][multipoleParticleX[ii]].z,
+                     amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][multipoleParticleX[ii]],
                     maxIndices[multipoleParticleX[ii]],
-                     amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][0].z, maxIndices[0] );
+                     amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][0], maxIndices[0] );
        // charges
@@ -1733,17 +1779,16 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
        }
 #ifdef AMOEBA_DEBUG
-        //if( amoebaGpu->log && ( ( ( ii < maxPrint ) || (ii >= (charges.size() - maxPrint) ) ) || ( ii == targetAtoms[0] || ii == targetAtoms[1]) ) ){
        if( (amoebaGpu->log && ( ( ( ii < maxPrint ) || (ii >= (charges.size() - maxPrint) )) ) ) ){
            // axis particles
-            (void) fprintf( amoebaGpu->log,"%u axis particles [%6d %6d %6d diff=%d %d] ", ii,
+            (void) fprintf( amoebaGpu->log,"%u axis particles [%6d %6d %6d] axis=%d max=%d diff=%d ", ii,
                            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x,
                            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y,
                            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
-                            maxIndices[ii], maxIndices[ii] - amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
+                            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w,
-                            amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w );
+                            maxIndices[ii], maxIndices[ii] - amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii] );
            // dipole
@@ -1886,6 +1931,13 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
    }
+    if( amoebaGpu->log ){
+        std::string axisLabel[maxAxisType+1] = {  "ZThenX", "Bisector", "ZBisect", "ThreeFold", "ZOnly", "Unknown"};
+        for( unsigned int kk = 0; kk < (maxAxisType+1); kk++ ){
+            (void) fprintf( amoebaGpu->log, "%2u %10s atom count=%d\n", kk, axisLabel[kk].c_str(), axisTypeCount[kk] );
+        }
+    }
 #if 0
    if( amoebaGpu->log ){
        FILE* filePtr = fopen( "oldScale.txt", "w" );
@@ -1916,7 +1968,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
        // axis type & multipole particles ids
-        int diff = maxIndices[ii] - amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z;
+        int diff = maxIndices[ii] - amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii];
        if( diff > amoebaGpu->maxMapTorqueDifference ){
            amoebaGpu->maxMapTorqueDifference = diff;
        }
@@ -1944,6 +1996,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
    amoebaGpu->amoebaSim.paddedNumberOfAtoms   = amoebaGpu->paddedNumberOfAtoms;
    amoebaGpu->psMultipoleParticlesIdsAndAxisType->Upload();
+    amoebaGpu->psMultipoleAxisOffset->Upload();
    amoebaGpu->psMolecularDipole->Upload();
    amoebaGpu->psMolecularQuadrupole->Upload();
    amoebaGpu->psCovalentDegree->Upload();
@@ -2707,8 +2760,9 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
    // molecular frame multipoles
-    delete gpu->psRotationMatrix;
+    //delete gpu->psRotationMatrix;
    delete gpu->psMultipoleParticlesIdsAndAxisType;
+    delete gpu->psMultipoleAxisOffset;
    delete gpu->psMolecularDipole;
    delete gpu->psMolecularQuadrupole;
    delete gpu->psLabFrameDipole;

--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
@@ -139,6 +139,11 @@ struct cudaAmoebaGmxSimulation {
    float scalingDistanceCutoff;                    // scaling cutoff
    float2*         pDampingFactorAndThole;         // Thole & damping factors
+    float* pRotationMatrix; 
+    int4*  pMultipoleParticlesIdsAndAxisType; 
+    int*   pMultipoleAxisOffset; 
+    float* pMolecularDipole; 
+    float* pMolecularQuadrupole; 
    float* pLabFrameDipole;
    float* pLabFrameQuadrupole;
    float* pInducedDipole;

--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
@@ -129,6 +129,7 @@ struct _amoebaGpuContext {
    // multipole parameters
    CUDAStream<int4>* psMultipoleParticlesIdsAndAxisType;
+    CUDAStream<int>* psMultipoleAxisOffset;
    CUDAStream<float>* psMolecularDipole;
    CUDAStream<float>* psMolecularQuadrupole;

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
@@ -52,6 +52,97 @@ __device__ static float normVector3( float* vector )
 #undef AMOEBA_DEBUG
+// ZThenX    == 0
+// Bisector  == 1
+// ZBisect   == 2
+// ThreeFold == 3
+__global__
+#if (__CUDA_ARCH__ >= 200)
+__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
+#elif (__CUDA_ARCH__ >= 130)
+__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
+#else
+__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
+#endif
+void kCudaComputeCheckChiral_kernel( void )
+{
+    const int AD          = 0;
+    const int BD          = 1;
+    const int CD          = 2;
+    const int C           = 3;
+    float delta[4][3];
+    float4* atomCoord            = cSim.pPosq;
+    int4* multiPoleAtoms         = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
+    float* molecularDipole       = cAmoebaSim.pMolecularDipole;
+    float* molecularQuadrupole   = cAmoebaSim.pMolecularQuadrupole;
+    float* labFrameDipole        = cAmoebaSim.pLabFrameDipole;
+    float* labFrameQuadrupole    = cAmoebaSim.pLabFrameQuadrupole;
+    // ---------------------------------------------------------------------------------------
+    int atomIndex                = blockIdx.x;
+    int axisType                 = multiPoleAtoms[atomIndex].w; 
+    float* molDipole             = &(molecularDipole[atomIndex*3]);
+    float* labDipole             = &(labFrameDipole[atomIndex*3]);
+    labDipole[0]                 = molDipole[0];
+    labDipole[1]                 = molDipole[1];
+    labDipole[2]                 = molDipole[2];
+    float* molQuadrupole         = &(molecularQuadrupole[atomIndex*9]);
+    float* labQuadrupole         = &(labFrameQuadrupole[atomIndex*9]);
+    labQuadrupole[0]             = molQuadrupole[0];
+    labQuadrupole[1]             = molQuadrupole[1];
+    labQuadrupole[2]             = molQuadrupole[2];
+    labQuadrupole[3]             = molQuadrupole[3];
+    labQuadrupole[4]             = molQuadrupole[4];
+    labQuadrupole[5]             = molQuadrupole[5];
+    labQuadrupole[6]             = molQuadrupole[6];
+    labQuadrupole[7]             = molQuadrupole[7];
+    labQuadrupole[8]             = molQuadrupole[8];
+    // skip z-then-x
+    if( axisType == 0 )return;
+    // ---------------------------------------------------------------------------------------
+    int atomA                    = atomIndex;
+    int atomB                    = multiPoleAtoms[atomIndex].z;
+    int atomC                    = multiPoleAtoms[atomIndex].x;
+    int atomD                    = multiPoleAtoms[atomIndex].y;
+    delta[AD][0]                 = atomCoord[atomA].x - atomCoord[atomD].x;
+    delta[AD][1]                 = atomCoord[atomA].y - atomCoord[atomD].y;
+    delta[AD][2]                 = atomCoord[atomA].z - atomCoord[atomD].z;
+    delta[BD][0]                 = atomCoord[atomB].x - atomCoord[atomD].x;
+    delta[BD][1]                 = atomCoord[atomB].y - atomCoord[atomD].y;
+    delta[BD][2]                 = atomCoord[atomB].z - atomCoord[atomD].z;
+    delta[CD][0]                 = atomCoord[atomC].x - atomCoord[atomD].x;
+    delta[CD][1]                 = atomCoord[atomC].y - atomCoord[atomD].y;
+    delta[CD][2]                 = atomCoord[atomC].z - atomCoord[atomD].z;
+    delta[C][0]                  = delta[BD][1]*delta[CD][2] - delta[BD][2]*delta[CD][1];
+    delta[C][1]                  = delta[CD][1]*delta[AD][2] - delta[CD][2]*delta[AD][1];
+    delta[C][2]                  = delta[AD][1]*delta[BD][2] - delta[AD][2]*delta[BD][1];
+    float volume                 = delta[C][0]*delta[AD][0] + delta[C][1]*delta[BD][0] + delta[C][2]*delta[CD][0];
+    if( volume < 0.0 ){
+        labDipole[1]            *= -1.0f; // pole(3,i)
+        labQuadrupole[1]        *= -1.0f; // pole(6,i)  && pole(8,i)
+        labQuadrupole[3]        *= -1.0f; // pole(10,i) && pole(12,i)
+        labQuadrupole[5]        *= -1.0f; // pole(6,i)  && pole(8,i)
+        labQuadrupole[7]        *= -1.0f; // pole(10,i) && pole(12,i)
+    }
+}
 __global__
 #if (__CUDA_ARCH__ >= 200)
 __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
@@ -60,22 +151,23 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
 #else
 __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
 #endif
-void kCudaComputeLabFrameMoments_kernel(
+void kCudaComputeLabFrameMoments_kernel( void )
-				   int numOfAtoms,
-				   float *rotationMatrix,
-				   float4 *atomCoord,
-				   int4 *multiPoleAtoms,
-				   float *molecularDipole, float *molecularQuadrupole,
-				   float *labFrameDipole,  float *labFrameQuadrupole )
 {
-   float* vectorX;
+    float vectorX[3];
-   float* vectorY;
+    float vectorY[3];
-   float* vectorZ;
+    float vectorZ[3];
+    int numOfAtoms               = cSim.atoms;
+    //float* rotationMatrix        = cAmoebaSim.pRotationMatrix;
+    float4* atomCoord            = cSim.pPosq;
+    int4* multiPoleAtoms         = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
+    float* labFrameDipole        = cAmoebaSim.pLabFrameDipole;
+    float* labFrameQuadrupole    = cAmoebaSim.pLabFrameQuadrupole;
    // ---------------------------------------------------------------------------------------
-   int atomIndex = blockIdx.x;//__mul24(blockIdx.x,blockDim.x) + threadIdx.x ;
+    int atomIndex = blockIdx.x;
    // ---------------------------------------------------------------------------------------
@@ -87,20 +179,22 @@ void kCudaComputeLabFrameMoments_kernel(
    // code common to ZThenX and Bisector
+ /*
    vectorX                          = &(rotationMatrix[atomIndex*9]);
    vectorY                          = &(rotationMatrix[atomIndex*9+ 3]);
    vectorZ                          = &(rotationMatrix[atomIndex*9+ 6]);
+ */
    float4 coordinatesThisAtom       = atomCoord[atomIndex];
-   int multipoleAtomIndex           = multiPoleAtoms[atomIndex].x;
+    int multipoleAtomIndex           = multiPoleAtoms[atomIndex].z;
    float4 coordinatesAxisAtom       = atomCoord[multipoleAtomIndex];
    vectorZ[0]                       = coordinatesAxisAtom.x - coordinatesThisAtom.x;
    vectorZ[1]                       = coordinatesAxisAtom.y - coordinatesThisAtom.y;
    vectorZ[2]                       = coordinatesAxisAtom.z - coordinatesThisAtom.z;
-   multipoleAtomIndex               = multiPoleAtoms[atomIndex].y; 
+    multipoleAtomIndex               = multiPoleAtoms[atomIndex].x; 
    coordinatesAxisAtom              = atomCoord[multipoleAtomIndex];
    vectorX[0]                       = coordinatesAxisAtom.x - coordinatesThisAtom.x;
@@ -109,14 +203,55 @@ void kCudaComputeLabFrameMoments_kernel(
    int axisType                     = multiPoleAtoms[atomIndex].w; 
-   float sum                        = normVector3( vectorZ );
+    /*
+        z-only
+           (1) norm z
+           (2) select random x
+           (3) x = x - (x.z)z
+           (4) norm x
+        z-then-x
+           (1) norm z
+           (2) norm x (not needed)
+           (3) x = x - (x.z)z
+           (4) norm x
+        bisector
+           (1) norm z
+           (2) norm x 
+           (3) z = x + z
+           (4) norm z
+           (5) x = x - (x.z)z 
+           (6) norm x 
+        z-bisect
+           (1) norm z
+           (2) norm x 
+           (3) norm y 
+           (3) x = x + y
+           (4) norm x
+           (5) x = x - (x.z)z 
+           (6) norm x 
+        3-fold
+           (1) norm z
+           (2) norm x 
+           (3) norm y 
+           (4) z = x + y + z
+           (5) norm z
+           (6) x = x - (x.z)z 
+           (7) norm x 
+    */
    // branch based on axis type
+    float sum                        = normVector3( vectorZ );
    if( axisType == 1 ){
        // bisector
-     // dx = dx1 + dx2 (in Tinker code)
        sum                     = normVector3( vectorX );
@@ -126,8 +261,45 @@ void kCudaComputeLabFrameMoments_kernel(
        sum                     = normVector3( vectorZ );
+    } else if( axisType == 2 || axisType == 3 ){ 
+        // z-bisect
+        multipoleAtomIndex      = multiPoleAtoms[atomIndex].y; 
+        coordinatesAxisAtom     = atomCoord[multipoleAtomIndex];
+        vectorY[0]              = coordinatesAxisAtom.x - coordinatesThisAtom.x;
+        vectorY[1]              = coordinatesAxisAtom.y - coordinatesThisAtom.y;
+        vectorY[2]              = coordinatesAxisAtom.z - coordinatesThisAtom.z;
+        sum                     = normVector3( vectorY );
+        sum                     = normVector3( vectorX );
+        if( axisType == 2 ){
+            vectorX[0]         += vectorY[0];
+            vectorX[1]         += vectorY[1];
+            vectorX[2]         += vectorY[2];
+            sum                 = normVector3( vectorX );
+        } else { 
+            // 3-fold
+            vectorZ[0]         += vectorX[0] + vectorY[0];
+            vectorZ[1]         += vectorX[1] + vectorY[1];
+            vectorZ[2]         += vectorX[2] + vectorY[2];
+            sum                 = normVector3( vectorZ );
+        }
+    } else if( axisType == 4 ){ 
+        vectorX[0]             = 0.1f;
+        vectorX[1]             = 0.1f;
+        vectorX[2]             = 0.1f;
    }
+    // x = x - (x.z)z
    float dot         = vectorZ[0]*vectorX[0] + vectorZ[1]*vectorX[1] + vectorZ[2]*vectorX[2];
    vectorX[0]       -= dot*vectorZ[0];
@@ -140,8 +312,28 @@ void kCudaComputeLabFrameMoments_kernel(
    vectorY[1]        = (vectorZ[2]*vectorX[0]) - (vectorZ[0]*vectorX[2]);
    vectorY[2]        = (vectorZ[0]*vectorX[1]) - (vectorZ[1]*vectorX[0]);
-   float* molDipole  = &(molecularDipole[atomIndex*3]);
+    // use identity rotation matrix for unrecognized axis types
+    if( axisType < 0 || axisType > 4 ){
+        vectorX[0] = 1.0f;
+        vectorX[1] = 0.0f;
+        vectorX[2] = 0.0f;
+        vectorY[0] = 0.0f;
+        vectorY[1] = 1.0f;
+        vectorY[2] = 0.0f;
+        vectorZ[0] = 0.0f;
+        vectorZ[1] = 0.0f;
+        vectorZ[2] = 1.0f;
+    }
+    float molDipole[3];
    float* labDipole  = &(labFrameDipole[atomIndex*3]);
+    molDipole[0]      = labDipole[0];
+    molDipole[1]      = labDipole[1];
+    molDipole[2]      = labDipole[2];
    // set out-of-range elements to 0.0f
@@ -151,16 +343,20 @@ void kCudaComputeLabFrameMoments_kernel(
    // ---------------------------------------------------------------------------------------
-   const float * mPole[3];
    float* rPole[3];
+    float mPole[3][3];
-   float* molQuadrupole       = &(molecularQuadrupole[atomIndex*9]);
    float* labQuadrupole       = &(labFrameQuadrupole[atomIndex*9]);
    for( int ii = 0; ii < 3; ii++ ){
-      mPole[ii]    = molQuadrupole + ii*3;
+        mPole[ii][0]   = labQuadrupole[3*ii+0];
+        mPole[ii][1]   = labQuadrupole[3*ii+1];
+        mPole[ii][2]   = labQuadrupole[3*ii+2];
        rPole[ii]      = labQuadrupole + ii*3;
-      rPole[ii][0] = rPole[ii][1] = rPole[ii][2] = 0.0f;
+        rPole[ii][0]   = 0.0f;
+        rPole[ii][1]   = 0.0f;
+        rPole[ii][2]   = 0.0f;
    }
    int ii = threadIdx.x;
@@ -240,16 +436,10 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
    double kernelTime = 0.0;
 #endif
-    kCudaComputeLabFrameMoments_kernel<<< numBlocks, numThreads>>> (
+    kCudaComputeCheckChiral_kernel<<< numBlocks, numThreads>>> ( );
-       gpu->natoms,
+    LAUNCHERROR("kCudaComputeCheckChiral");
-       amoebaGpu->psRotationMatrix->_pDevStream[0],
-       gpu->psPosq4->_pDevStream[0],
+    kCudaComputeLabFrameMoments_kernel<<< numBlocks, numThreads>>> ( );
-       amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0],
-       amoebaGpu->psMolecularDipole->_pDevStream[0],
-       amoebaGpu->psMolecularQuadrupole->_pDevStream[0],
-       amoebaGpu->psLabFrameDipole->_pDevStream[0],
-       amoebaGpu->psLabFrameQuadrupole->_pDevStream[0] 
-       );
    LAUNCHERROR(methodName);
 #ifdef AMOEBA_DEBUG

--- a/plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.cpp
+++ b/plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.cpp
--- a/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
+++ b/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
@@ -382,6 +382,46 @@ void AmoebaReferenceMultipoleForce::loadParticleData( RealOpenMM** particlePosit
    }
 }
+void AmoebaReferenceMultipoleForce::checkChiral( MultipoleParticleData& particleI, int axisType,
+                                                 MultipoleParticleData& particleZ, MultipoleParticleData& particleX, 
+                                                 MultipoleParticleData& particleY ) const {
+    // ---------------------------------------------------------------------------------------
+    static const RealOpenMM one         = 1.0;
+    static const std::string methodName = "AmoebaReferenceMultipoleForce::checkChiral";
+    static const int AD                 = 0;
+    static const int BD                 = 1;
+    static const int CD                 = 2;
+    static const int C                  = 3;
+    double delta[4][3];
+    // ---------------------------------------------------------------------------------------
+    if( axisType == AmoebaMultipoleForce::ZThenX ){
+        return;
+    }
+    getDelta( particleY, particleI, delta[AD] );
+    getDelta( particleY, particleZ, delta[BD] );
+    getDelta( particleY, particleX, delta[CD] );
+    delta[C][0]       = delta[BD][1]*delta[CD][2] - delta[BD][2]*delta[CD][1];
+    delta[C][1]       = delta[CD][1]*delta[AD][2] - delta[CD][2]*delta[AD][1];
+    delta[C][2]       = delta[AD][1]*delta[BD][2] - delta[AD][2]*delta[BD][1];
+    RealOpenMM volume = delta[C][0]*delta[AD][0] + delta[C][1]*delta[BD][0] + delta[C][2]*delta[CD][0];
+    if( volume < 0.0 ){
+        particleI.dipole[1]         *= -one; // pole(3,i)
+        particleI.quadrupole[QXY]   *= -one; // pole(6,i)  && pole(8,i)
+        particleI.quadrupole[QYZ]   *= -one; // pole(10,i) && pole(12,i)
+    }
+    return;
+}
 void AmoebaReferenceMultipoleForce::applyRotationMatrix(       MultipoleParticleData& particleI,
                                                         const MultipoleParticleData& particleZ,
                                                         const MultipoleParticleData& particleX, int axisType ) const {
@@ -1460,7 +1500,7 @@ RealOpenMM AmoebaReferenceMultipoleForce::calculateNoCutoffElectrostatic( std::v
    // ---------------------------------------------------------------------------------------
-    // initialize forces/energy and scaleing factors
+    // initialize forces/energy and scaling factors
    std::vector<Vec3> torques( particleData.size() );
    for( unsigned int ii = 0; ii <  particleData.size(); ii++ ){
@@ -1580,6 +1620,16 @@ RealOpenMM AmoebaReferenceMultipoleForce::calculateNoCutoffForceAndEnergy( unsig
    loadParticleData( particlePositions, charges, dipoles, quadrupoles,
                      tholes, dampingFactors, polarity, particleData );
+    // check for chiral centers that need multipoles inverted
+    for( unsigned int ii = 0; ii < numParticles; ii++ ){
+        if( multipoleAtomYs[ii] ){
+            checkChiral( particleData[ii], axisTypes[ii], particleData[multipoleAtomZs[ii]], particleData[multipoleAtomXs[ii]], particleData[multipoleAtomYs[ii]] );
+        }
+    }
+    // apply rotation matrix
    for( unsigned int ii = 0; ii < numParticles; ii++ ){
        if( multipoleAtomZs[ii] >= 0 && multipoleAtomXs[ii] >= 0 ){
            applyRotationMatrix( particleData[ii], particleData[multipoleAtomZs[ii]], particleData[multipoleAtomXs[ii]], axisTypes[ii] );

--- a/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
+++ b/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
@@ -407,6 +407,24 @@ private:
    void getAndScaleInverseRs( RealOpenMM dampI, RealOpenMM dampJ, RealOpenMM tholeI, RealOpenMM tholeJ,
                               RealOpenMM r, std::vector<RealOpenMM>& rrI ) const;
+    /**---------------------------------------------------------------------------------------
+       Check multipoles at chiral sites
+       inverts atomic multipole moments as necessary
+       at sites with chiral local reference frame definitions
+       @param  particleI            particleI data 
+       @param  axisType             axis type
+       @param  particleZ            z-axis particle to particleI
+       @param  particleX            x-axis particle to particleI
+       @param  particleY            y-axis particle to particleI
+       --------------------------------------------------------------------------------------- */
+    void checkChiral( MultipoleParticleData& particleI, int axisType, MultipoleParticleData& particleZ,
+                      MultipoleParticleData& particleX, MultipoleParticleData& particleY ) const;
    /**---------------------------------------------------------------------------------------
       Apply roatation matrix to molecular dipole/quadrupoles to get corresponding lab frame values