Commit 41abd9fb authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Fix for isolated ions

parent 761d7e17
......@@ -93,7 +93,7 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if( mapIterator == contextToAmoebaDataMap.end() ){
amoebaCudaData = new AmoebaCudaData( cudaPlatformData );
contextToAmoebaDataMap[&context] = amoebaCudaData;
amoebaCudaData->setLog( stderr );
//amoebaCudaData->setLog( stderr );
amoebaCudaData->setContextImpl( static_cast<void*>(&context) );
} else {
amoebaCudaData = mapIterator->second;
......
......@@ -86,13 +86,14 @@ amoebaGpuContext amoebaGpuInit( _gpuContext* gpu )
extern "C"
void gpuPrintCudaStream( std::string name,
unsigned int length, unsigned int subStreams, unsigned int stride,
unsigned int memoryFootprint,
void* pSysStream, void* pDevStream,
void* pSysData, void* pDevData, FILE* log)
{
(void) fprintf( log, " %-35s [%8u %5u %8u] Stream[%p %p] Data[%16p %16p]\n",
(void) fprintf( log, " %-35s [%8u %5u %8u %8u] Stream[%p %p] Data[%16p %16p]\n",
name.c_str(), length, subStreams,
stride, pSysStream, pDevStream, pSysData, pDevData );
stride, memoryFootprint, pSysStream, pDevStream, pSysData, pDevData );
}
extern "C"
......@@ -102,6 +103,7 @@ void gpuPrintCudaStreamFloat( CUDAStream<float>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -113,6 +115,7 @@ void gpuPrintCudaStreamFloat2( CUDAStream<float2>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float2 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -124,6 +127,7 @@ void gpuPrintCudaStreamFloat4( CUDAStream<float4>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float4 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -135,6 +139,7 @@ void gpuPrintCudaStreamUnsignedInt( CUDAStream<unsigned int>* cUDAStream, FILE*
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( unsigned int ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -146,6 +151,7 @@ void gpuPrintCudaStreamInt( CUDAStream<int>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -157,6 +163,7 @@ void gpuPrintCudaStreamInt2( CUDAStream<int2>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int2 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -168,6 +175,7 @@ void gpuPrintCudaStreamInt4( CUDAStream<int4>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int4 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -1277,15 +1285,6 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s: paddedNumberOfAtoms=%d\n",
methodName.c_str(), paddedNumberOfAtoms ); (void) fflush( amoebaGpu->log );
}
#endif
// work space
// parameters
amoebaGpu->psMultipoleParticlesIdsAndAxisType = new CUDAStream<int4>(paddedNumberOfAtoms, 1, "MultipoleParticlesIdsAndAxisType");
......@@ -1296,9 +1295,11 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->psMolecularDipole = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "MolecularDipole");
amoebaGpu->amoebaSim.pMolecularDipole = amoebaGpu->psMolecularDipole->_pDevData;
memset( amoebaGpu->psMolecularDipole->_pSysData, 0, sizeof(float)*3*paddedNumberOfAtoms );
amoebaGpu->psMolecularQuadrupole = new CUDAStream<float>(9*paddedNumberOfAtoms, 1, "MolecularQuadrupole");
amoebaGpu->amoebaSim.pMolecularQuadrupole = amoebaGpu->psMolecularQuadrupole->_pDevData;
memset( amoebaGpu->psMolecularQuadrupole->_pSysData, 0, sizeof(float)*9*paddedNumberOfAtoms );
// output
......
......@@ -47,38 +47,10 @@ typedef MapIntFloat::const_iterator MapIntFloatCI;
struct _amoebaGpuContext {
_gpuContext* gpuContext;
cudaAmoebaGmxSimulation amoebaSim;
FILE* log;
//bool bOutputBufferPerWarp;
//unsigned int paddedNumberOfAtoms;
//unsigned int nonbondBlocks;
//unsigned int nonbondThreadsPerBlock;
//unsigned int nonbondOutputBuffers;
//unsigned int threadsPerBlock;
//unsigned int fieldReduceThreadsPerBlock;
//unsigned int outputBuffers;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<unsigned int>* psWorkUnit;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
cudaAmoebaGmxSimulation amoebaSim;
int maxCovalentDegreeSz;
CUDAStream<int4>* psAmoebaBondID;
CUDAStream<float2>* psAmoebaBondParameter;
......@@ -116,6 +88,25 @@ struct _amoebaGpuContext {
CUDAStream<int4>* psAmoebaTorsionTorsionID3;
CUDAStream<float4>* psAmoebaTorsionTorsionGrids;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<unsigned int>* psWorkUnit;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
int maxCovalentDegreeSz;
float solventDielectric;
// multipole parameters
......@@ -126,7 +117,6 @@ struct _amoebaGpuContext {
// buffer indices used for mapping torques onto forces
int maxTorqueBufferIndex;
int useNewTorqueMapScheme;
int torqueMapForce4Delete;
CUDAStream<int4>* psMultipoleParticlesTorqueBufferIndices;
CUDAStream<float4>* psTorqueMapForce4;
......
......@@ -6,7 +6,8 @@
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
......@@ -480,7 +481,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
// write results to file
if( 1 ){
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
......
......@@ -1730,7 +1730,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
}
LAUNCHERROR("kReduceToBornForcePrefactor");
#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
......@@ -1748,7 +1748,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
}
(void) fflush( amoebaGpu->log );
*/
if( 1 ){
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
......@@ -1943,7 +1943,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpu, amoebaGpu->psTorque );
if( 1 ){
if( 0 ){
std::vector<int> fileId;
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
......@@ -1956,7 +1956,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
if( 1 ){
if( 0 ){
std::vector<int> fileId;
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
......
......@@ -33,8 +33,8 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
RTERROR(status, "GetCalculateAmoebaCudaMutualInducedAndGkFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
#define AMOEBA_DEBUG
//#undef AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
......@@ -923,7 +923,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
}
#ifdef AMOEBA_DEBUG
if( 1 ){
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
......
......@@ -90,8 +90,8 @@ void kCudaComputeCheckChiral_kernel( void )
{
// skip z-then-x
int axisType = multiPoleParticles[particleIndex].w;
if( axisType != 0 && multiPoleParticles[particleIndex].y >= 0 )
int axisType = multiPoleParticles[particleIndex].w;
if( axisType != 0 && multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].y >=0 && multiPoleParticles[particleIndex].z >= 0 )
{
// ---------------------------------------------------------------------------------------
......@@ -164,204 +164,213 @@ void kCudaComputeLabFrameMoments_kernel( void )
while( particleIndex < numberOfParticles )
{
float4 coordinatesThisParticle = particleCoord[particleIndex];
int multipoleParticleIndex = multiPoleParticles[particleIndex].z;
float4 coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorZ[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorZ[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
vectorZ[2] = coordinatesAxisParticle.z - coordinatesThisParticle.z;
multipoleParticleIndex = multiPoleParticles[particleIndex].x;
coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorX[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorX[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
vectorX[2] = coordinatesAxisParticle.z - coordinatesThisParticle.z;
int axisType = multiPoleParticles[particleIndex].w;
/*
z-only
(1) norm z
(2) select random x
(3) x = x - (x.z)z
(4) norm x
z-then-x
(1) norm z
(2) norm x (not needed)
(3) x = x - (x.z)z
(4) norm x
bisector
(1) norm z
(2) norm x
(3) z = x + z
(4) norm z
(5) x = x - (x.z)z
(6) norm x
z-bisect
(1) norm z
(2) norm x
(3) norm y
(3) x = x + y
(4) norm x
(5) x = x - (x.z)z
(6) norm x
3-fold
(1) norm z
(2) norm x
(3) norm y
(4) z = x + y + z
(5) norm z
(6) x = x - (x.z)z
(7) norm x
*/
// branch based on axis type
if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 )
{
float4 coordinatesThisParticle = particleCoord[particleIndex];
float sum = normVector3( vectorZ );
if( axisType == 1 ){
int multipoleParticleIndex = multiPoleParticles[particleIndex].z;
float4 coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorZ[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorZ[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
vectorZ[2] = coordinatesAxisParticle.z - coordinatesThisParticle.z;
multipoleParticleIndex = multiPoleParticles[particleIndex].x;
coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorX[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorX[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
vectorX[2] = coordinatesAxisParticle.z - coordinatesThisParticle.z;
int axisType = multiPoleParticles[particleIndex].w;
// bisector
/*
z-only
(1) norm z
(2) select random x
(3) x = x - (x.z)z
(4) norm x
z-then-x
(1) norm z
(2) norm x (not needed)
(3) x = x - (x.z)z
(4) norm x
bisector
(1) norm z
(2) norm x
(3) z = x + z
(4) norm z
(5) x = x - (x.z)z
(6) norm x
z-bisect
(1) norm z
(2) norm x
(3) norm y
(3) x = x + y
(4) norm x
(5) x = x - (x.z)z
(6) norm x
3-fold
(1) norm z
(2) norm x
(3) norm y
(4) z = x + y + z
(5) norm z
(6) x = x - (x.z)z
(7) norm x
*/
// branch based on axis type
float sum = normVector3( vectorZ );
if( axisType == 1 ){
// bisector
sum = normVector3( vectorX );
vectorZ[0] += vectorX[0];
vectorZ[1] += vectorX[1];
vectorZ[2] += vectorX[2];
sum = normVector3( vectorZ );
} else if( axisType == 2 || axisType == 3 ){
// z-bisect
multipoleParticleIndex = multiPoleParticles[particleIndex].y;
if( multipoleParticleIndex >= 0 && multipoleParticleIndex < cSim.atoms ){
coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorY[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorY[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
vectorY[2] = coordinatesAxisParticle.z - coordinatesThisParticle.z;
sum = normVector3( vectorX );
sum = normVector3( vectorY );
sum = normVector3( vectorX );
vectorZ[0] += vectorX[0];
vectorZ[1] += vectorX[1];
vectorZ[2] += vectorX[2];
sum = normVector3( vectorZ );
} else if( axisType == 2 || axisType == 3 ){
// z-bisect
multipoleParticleIndex = multiPoleParticles[particleIndex].y;
coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorY[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorY[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
vectorY[2] = coordinatesAxisParticle.z - coordinatesThisParticle.z;
sum = normVector3( vectorY );
sum = normVector3( vectorX );
if( axisType == 2 ){
vectorX[0] += vectorY[0];
vectorX[1] += vectorY[1];
vectorX[2] += vectorY[2];
sum = normVector3( vectorX );
} else {
// 3-fold
if( axisType == 2 ){
vectorX[0] += vectorY[0];
vectorX[1] += vectorY[1];
vectorX[2] += vectorY[2];
sum = normVector3( vectorX );
} else {
// 3-fold
vectorZ[0] += vectorX[0] + vectorY[0];
vectorZ[1] += vectorX[1] + vectorY[1];
vectorZ[2] += vectorX[2] + vectorY[2];
sum = normVector3( vectorZ );
}
}
} else if( axisType >= 4 ){
vectorZ[0] += vectorX[0] + vectorY[0];
vectorZ[1] += vectorX[1] + vectorY[1];
vectorZ[2] += vectorX[2] + vectorY[2];
sum = normVector3( vectorZ );
vectorX[0] = 0.1f;
vectorX[1] = 0.1f;
vectorX[2] = 0.1f;
}
} else if( axisType >= 4 ){
vectorX[0] = 0.1f;
vectorX[1] = 0.1f;
vectorX[2] = 0.1f;
}
// x = x - (x.z)z
float dot = vectorZ[0]*vectorX[0] + vectorZ[1]*vectorX[1] + vectorZ[2]*vectorX[2];
vectorX[0] -= dot*vectorZ[0];
vectorX[1] -= dot*vectorZ[1];
vectorX[2] -= dot*vectorZ[2];
sum = normVector3( vectorX );
vectorY[0] = (vectorZ[1]*vectorX[2]) - (vectorZ[2]*vectorX[1]);
vectorY[1] = (vectorZ[2]*vectorX[0]) - (vectorZ[0]*vectorX[2]);
vectorY[2] = (vectorZ[0]*vectorX[1]) - (vectorZ[1]*vectorX[0]);
// use identity rotation matrix for unrecognized axis types
if( axisType < 0 || axisType > 4 ){
// x = x - (x.z)z
vectorX[0] = 1.0f;
vectorX[1] = 0.0f;
vectorX[2] = 0.0f;
vectorY[0] = 0.0f;
vectorY[1] = 1.0f;
vectorY[2] = 0.0f;
vectorZ[0] = 0.0f;
vectorZ[1] = 0.0f;
vectorZ[2] = 1.0f;
}
unsigned int offset = 3*particleIndex;
float dot = vectorZ[0]*vectorX[0] + vectorZ[1]*vectorX[1] + vectorZ[2]*vectorX[2];
float molDipole[3];
molDipole[0] = labFrameDipole[offset];
molDipole[1] = labFrameDipole[offset+1];
molDipole[2] = labFrameDipole[offset+2];
vectorX[0] -= dot*vectorZ[0];
vectorX[1] -= dot*vectorZ[1];
vectorX[2] -= dot*vectorZ[2];
// set out-of-range elements to 0.0f
sum = normVector3( vectorX );
vectorY[0] = (vectorZ[1]*vectorX[2]) - (vectorZ[2]*vectorX[1]);
vectorY[1] = (vectorZ[2]*vectorX[0]) - (vectorZ[0]*vectorX[2]);
vectorY[2] = (vectorZ[0]*vectorX[1]) - (vectorZ[1]*vectorX[0]);
// use identity rotation matrix for unrecognized axis types
labFrameDipole[offset] = molDipole[0]*vectorX[0] + molDipole[1]*vectorY[0] + molDipole[2]*vectorZ[0];
labFrameDipole[offset+1] = molDipole[0]*vectorX[1] + molDipole[1]*vectorY[1] + molDipole[2]*vectorZ[1];
labFrameDipole[offset+2] = molDipole[0]*vectorX[2] + molDipole[1]*vectorY[2] + molDipole[2]*vectorZ[2];
// ---------------------------------------------------------------------------------------
float mPole[3][3];
offset = 9*particleIndex;
mPole[0][0] = labFrameQuadrupole[offset];
mPole[0][1] = labFrameQuadrupole[offset+1];
mPole[0][2] = labFrameQuadrupole[offset+2];
mPole[1][0] = labFrameQuadrupole[offset+3];
mPole[1][1] = labFrameQuadrupole[offset+4];
mPole[1][2] = labFrameQuadrupole[offset+5];
mPole[2][0] = labFrameQuadrupole[offset+6];
mPole[2][1] = labFrameQuadrupole[offset+7];
mPole[2][2] = labFrameQuadrupole[offset+8];
labFrameQuadrupole[offset+8] = vectorX[2]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
labFrameQuadrupole[offset+8] += vectorY[2]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
labFrameQuadrupole[offset+8] += vectorZ[2]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
if( axisType < 0 || axisType > 4 ){
labFrameQuadrupole[offset+4] = vectorX[1]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
labFrameQuadrupole[offset+4] += vectorY[1]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
labFrameQuadrupole[offset+4] += vectorZ[1]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
vectorX[0] = 1.0f;
vectorX[1] = 0.0f;
vectorX[2] = 0.0f;
labFrameQuadrupole[offset+5] = vectorX[1]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
labFrameQuadrupole[offset+5] += vectorY[1]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
labFrameQuadrupole[offset+5] += vectorZ[1]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
vectorY[0] = 0.0f;
vectorY[1] = 1.0f;
vectorY[2] = 0.0f;
labFrameQuadrupole[offset] = vectorX[0]*(vectorX[0]*mPole[0][0] + vectorY[0]*mPole[0][1] + vectorZ[0]*mPole[0][2]);
labFrameQuadrupole[offset] += vectorY[0]*(vectorX[0]*mPole[1][0] + vectorY[0]*mPole[1][1] + vectorZ[0]*mPole[1][2]);
labFrameQuadrupole[offset] += vectorZ[0]*(vectorX[0]*mPole[2][0] + vectorY[0]*mPole[2][1] + vectorZ[0]*mPole[2][2]);
vectorZ[0] = 0.0f;
vectorZ[1] = 0.0f;
vectorZ[2] = 1.0f;
}
labFrameQuadrupole[offset+1] = vectorX[0]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
labFrameQuadrupole[offset+1] += vectorY[0]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
labFrameQuadrupole[offset+1] += vectorZ[0]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
float molDipole[3];
molDipole[0] = labFrameDipole[particleIndex*3];
molDipole[1] = labFrameDipole[particleIndex*3+1];
molDipole[2] = labFrameDipole[particleIndex*3+2];
// set out-of-range elements to 0.0f
labFrameQuadrupole[offset+2] = vectorX[0]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
labFrameQuadrupole[offset+2] += vectorY[0]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
labFrameQuadrupole[offset+2] += vectorZ[0]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
labFrameDipole[particleIndex*3] = molDipole[0]*vectorX[0] + molDipole[1]*vectorY[0] + molDipole[2]*vectorZ[0];
labFrameDipole[particleIndex*3+1] = molDipole[0]*vectorX[1] + molDipole[1]*vectorY[1] + molDipole[2]*vectorZ[1];
labFrameDipole[particleIndex*3+2] = molDipole[0]*vectorX[2] + molDipole[1]*vectorY[2] + molDipole[2]*vectorZ[2];
// ---------------------------------------------------------------------------------------
float mPole[3][3];
unsigned int offset = particleIndex*9;
mPole[0][0] = labFrameQuadrupole[offset];
mPole[0][1] = labFrameQuadrupole[offset+1];
mPole[0][2] = labFrameQuadrupole[offset+2];
labFrameQuadrupole[offset+3] = labFrameQuadrupole[offset+1];
labFrameQuadrupole[offset+6] = labFrameQuadrupole[offset+2];
labFrameQuadrupole[offset+7] = labFrameQuadrupole[offset+5];
mPole[1][0] = labFrameQuadrupole[offset+3];
mPole[1][1] = labFrameQuadrupole[offset+4];
mPole[1][2] = labFrameQuadrupole[offset+5];
mPole[2][0] = labFrameQuadrupole[offset+6];
mPole[2][1] = labFrameQuadrupole[offset+7];
mPole[2][2] = labFrameQuadrupole[offset+8];
labFrameQuadrupole[offset+8] = vectorX[2]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
labFrameQuadrupole[offset+8] += vectorY[2]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
labFrameQuadrupole[offset+8] += vectorZ[2]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
labFrameQuadrupole[offset+4] = vectorX[1]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
labFrameQuadrupole[offset+4] += vectorY[1]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
labFrameQuadrupole[offset+4] += vectorZ[1]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
labFrameQuadrupole[offset+5] = vectorX[1]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
labFrameQuadrupole[offset+5] += vectorY[1]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
labFrameQuadrupole[offset+5] += vectorZ[1]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
labFrameQuadrupole[offset] = vectorX[0]*(vectorX[0]*mPole[0][0] + vectorY[0]*mPole[0][1] + vectorZ[0]*mPole[0][2]);
labFrameQuadrupole[offset] += vectorY[0]*(vectorX[0]*mPole[1][0] + vectorY[0]*mPole[1][1] + vectorZ[0]*mPole[1][2]);
labFrameQuadrupole[offset] += vectorZ[0]*(vectorX[0]*mPole[2][0] + vectorY[0]*mPole[2][1] + vectorZ[0]*mPole[2][2]);
labFrameQuadrupole[offset+1] = vectorX[0]*(vectorX[1]*mPole[0][0] + vectorY[1]*mPole[0][1] + vectorZ[1]*mPole[0][2]);
labFrameQuadrupole[offset+1] += vectorY[0]*(vectorX[1]*mPole[1][0] + vectorY[1]*mPole[1][1] + vectorZ[1]*mPole[1][2]);
labFrameQuadrupole[offset+1] += vectorZ[0]*(vectorX[1]*mPole[2][0] + vectorY[1]*mPole[2][1] + vectorZ[1]*mPole[2][2]);
labFrameQuadrupole[offset+2] = vectorX[0]*(vectorX[2]*mPole[0][0] + vectorY[2]*mPole[0][1] + vectorZ[2]*mPole[0][2]);
labFrameQuadrupole[offset+2] += vectorY[0]*(vectorX[2]*mPole[1][0] + vectorY[2]*mPole[1][1] + vectorZ[2]*mPole[1][2]);
labFrameQuadrupole[offset+2] += vectorZ[0]*(vectorX[2]*mPole[2][0] + vectorY[2]*mPole[2][1] + vectorZ[2]*mPole[2][2]);
labFrameQuadrupole[offset+3] = labFrameQuadrupole[offset+1];
labFrameQuadrupole[offset+6] = labFrameQuadrupole[offset+2];
labFrameQuadrupole[offset+7] = labFrameQuadrupole[offset+5];
}
particleIndex += gridDim.x*blockDim.x;
}
......@@ -384,28 +393,24 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if( 0 && amoebaGpu->log ){
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: numBlocks/atoms=%d\n", methodName, numBlocks ); (void) fflush( amoebaGpu->log );
amoebaGpu->psMultipoleParticlesIdsAndAxisType->Download();
amoebaGpu->psMolecularDipole->Download();
gpu->psPosq4->Download();
for( int ii = 0; ii < gpu->natoms; ii++ ){
int mIndex = 3*ii;
(void) fprintf( amoebaGpu->log,"%6d [%6d %6d %6d] x[%16.9e %16.9e %16.9e] dpl[%16.9e %16.9e %16.9e]\nRot[%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n\n", ii,
(void) fprintf( amoebaGpu->log,"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s\n", ii,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].x,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].y,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].z,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].w,
gpu->psPosq4->_pSysData[ii].x,
gpu->psPosq4->_pSysData[ii].y,
gpu->psPosq4->_pSysData[ii].z,
amoebaGpu->psMolecularDipole->_pSysData[mIndex],
amoebaGpu->psMolecularDipole->_pSysData[mIndex+1],
amoebaGpu->psMolecularDipole->_pSysData[mIndex+2] );
if( ii == 30 )ii = gpu->natoms - 30;
gpu->psPosq4->_pSysData[ii].z, (amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].w > 1 ? " XXX" : "") );
//if( ii == 30 )ii = gpu->natoms - 30;
}
}
// int64 kernelTime = AmoebaTiming::getTimeOfDay();
double kernelTime = 0.0;
#endif
// copy molecular moments to lab frame moment arrays
......@@ -431,7 +436,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaLabFrameMoments( amoebaGpu );
if( 1 ){
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
//fileId.push_back( 0 );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment