"platforms/opencl/vscode:/vscode.git/clone" did not exist on "ca4c03c362446abaf66b832ddd500ec049b2ec60"
Commit 41abd9fb authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Fix for isolated ions

parent 761d7e17
......@@ -93,7 +93,7 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if( mapIterator == contextToAmoebaDataMap.end() ){
amoebaCudaData = new AmoebaCudaData( cudaPlatformData );
contextToAmoebaDataMap[&context] = amoebaCudaData;
amoebaCudaData->setLog( stderr );
//amoebaCudaData->setLog( stderr );
amoebaCudaData->setContextImpl( static_cast<void*>(&context) );
} else {
amoebaCudaData = mapIterator->second;
......
......@@ -86,13 +86,14 @@ amoebaGpuContext amoebaGpuInit( _gpuContext* gpu )
extern "C"
void gpuPrintCudaStream( std::string name,
unsigned int length, unsigned int subStreams, unsigned int stride,
unsigned int memoryFootprint,
void* pSysStream, void* pDevStream,
void* pSysData, void* pDevData, FILE* log)
{
(void) fprintf( log, " %-35s [%8u %5u %8u] Stream[%p %p] Data[%16p %16p]\n",
(void) fprintf( log, " %-35s [%8u %5u %8u %8u] Stream[%p %p] Data[%16p %16p]\n",
name.c_str(), length, subStreams,
stride, pSysStream, pDevStream, pSysData, pDevData );
stride, memoryFootprint, pSysStream, pDevStream, pSysData, pDevData );
}
extern "C"
......@@ -102,6 +103,7 @@ void gpuPrintCudaStreamFloat( CUDAStream<float>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -113,6 +115,7 @@ void gpuPrintCudaStreamFloat2( CUDAStream<float2>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float2 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -124,6 +127,7 @@ void gpuPrintCudaStreamFloat4( CUDAStream<float4>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float4 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -135,6 +139,7 @@ void gpuPrintCudaStreamUnsignedInt( CUDAStream<unsigned int>* cUDAStream, FILE*
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( unsigned int ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -146,6 +151,7 @@ void gpuPrintCudaStreamInt( CUDAStream<int>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -157,6 +163,7 @@ void gpuPrintCudaStreamInt2( CUDAStream<int2>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int2 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -168,6 +175,7 @@ void gpuPrintCudaStreamInt4( CUDAStream<int4>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int4 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
}
......@@ -1277,15 +1285,6 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s: paddedNumberOfAtoms=%d\n",
methodName.c_str(), paddedNumberOfAtoms ); (void) fflush( amoebaGpu->log );
}
#endif
// work space
// parameters
amoebaGpu->psMultipoleParticlesIdsAndAxisType = new CUDAStream<int4>(paddedNumberOfAtoms, 1, "MultipoleParticlesIdsAndAxisType");
......@@ -1296,9 +1295,11 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->psMolecularDipole = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "MolecularDipole");
amoebaGpu->amoebaSim.pMolecularDipole = amoebaGpu->psMolecularDipole->_pDevData;
memset( amoebaGpu->psMolecularDipole->_pSysData, 0, sizeof(float)*3*paddedNumberOfAtoms );
amoebaGpu->psMolecularQuadrupole = new CUDAStream<float>(9*paddedNumberOfAtoms, 1, "MolecularQuadrupole");
amoebaGpu->amoebaSim.pMolecularQuadrupole = amoebaGpu->psMolecularQuadrupole->_pDevData;
memset( amoebaGpu->psMolecularQuadrupole->_pSysData, 0, sizeof(float)*9*paddedNumberOfAtoms );
// output
......
......@@ -47,38 +47,10 @@ typedef MapIntFloat::const_iterator MapIntFloatCI;
struct _amoebaGpuContext {
_gpuContext* gpuContext;
cudaAmoebaGmxSimulation amoebaSim;
FILE* log;
//bool bOutputBufferPerWarp;
//unsigned int paddedNumberOfAtoms;
//unsigned int nonbondBlocks;
//unsigned int nonbondThreadsPerBlock;
//unsigned int nonbondOutputBuffers;
//unsigned int threadsPerBlock;
//unsigned int fieldReduceThreadsPerBlock;
//unsigned int outputBuffers;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<unsigned int>* psWorkUnit;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
cudaAmoebaGmxSimulation amoebaSim;
int maxCovalentDegreeSz;
CUDAStream<int4>* psAmoebaBondID;
CUDAStream<float2>* psAmoebaBondParameter;
......@@ -116,6 +88,25 @@ struct _amoebaGpuContext {
CUDAStream<int4>* psAmoebaTorsionTorsionID3;
CUDAStream<float4>* psAmoebaTorsionTorsionGrids;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<unsigned int>* psWorkUnit;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
int maxCovalentDegreeSz;
float solventDielectric;
// multipole parameters
......@@ -126,7 +117,6 @@ struct _amoebaGpuContext {
// buffer indices used for mapping torques onto forces
int maxTorqueBufferIndex;
int useNewTorqueMapScheme;
int torqueMapForce4Delete;
CUDAStream<int4>* psMultipoleParticlesTorqueBufferIndices;
CUDAStream<float4>* psTorqueMapForce4;
......
......@@ -6,7 +6,8 @@
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
......@@ -480,7 +481,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
// write results to file
if( 1 ){
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
......
......@@ -1730,7 +1730,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
}
LAUNCHERROR("kReduceToBornForcePrefactor");
#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
......@@ -1748,7 +1748,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
}
(void) fflush( amoebaGpu->log );
*/
if( 1 ){
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
......@@ -1943,7 +1943,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpu, amoebaGpu->psTorque );
if( 1 ){
if( 0 ){
std::vector<int> fileId;
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
......@@ -1956,7 +1956,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
if( 1 ){
if( 0 ){
std::vector<int> fileId;
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
......
......@@ -33,8 +33,8 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
RTERROR(status, "GetCalculateAmoebaCudaMutualInducedAndGkFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
#define AMOEBA_DEBUG
//#undef AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
......@@ -923,7 +923,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
}
#ifdef AMOEBA_DEBUG
if( 1 ){
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
......
......@@ -91,7 +91,7 @@ void kCudaComputeCheckChiral_kernel( void )
// skip z-then-x
int axisType = multiPoleParticles[particleIndex].w;
if( axisType != 0 && multiPoleParticles[particleIndex].y >= 0 )
if( axisType != 0 && multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].y >=0 && multiPoleParticles[particleIndex].z >= 0 )
{
// ---------------------------------------------------------------------------------------
......@@ -163,6 +163,9 @@ void kCudaComputeLabFrameMoments_kernel( void )
// code common to ZThenX and Bisector
while( particleIndex < numberOfParticles )
{
if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 )
{
float4 coordinatesThisParticle = particleCoord[particleIndex];
......@@ -244,6 +247,7 @@ void kCudaComputeLabFrameMoments_kernel( void )
// z-bisect
multipoleParticleIndex = multiPoleParticles[particleIndex].y;
if( multipoleParticleIndex >= 0 && multipoleParticleIndex < cSim.atoms ){
coordinatesAxisParticle = particleCoord[multipoleParticleIndex];
vectorY[0] = coordinatesAxisParticle.x - coordinatesThisParticle.x;
vectorY[1] = coordinatesAxisParticle.y - coordinatesThisParticle.y;
......@@ -268,6 +272,7 @@ void kCudaComputeLabFrameMoments_kernel( void )
vectorZ[2] += vectorX[2] + vectorY[2];
sum = normVector3( vectorZ );
}
}
} else if( axisType >= 4 ){
......@@ -307,21 +312,23 @@ void kCudaComputeLabFrameMoments_kernel( void )
vectorZ[2] = 1.0f;
}
unsigned int offset = 3*particleIndex;
float molDipole[3];
molDipole[0] = labFrameDipole[particleIndex*3];
molDipole[1] = labFrameDipole[particleIndex*3+1];
molDipole[2] = labFrameDipole[particleIndex*3+2];
molDipole[0] = labFrameDipole[offset];
molDipole[1] = labFrameDipole[offset+1];
molDipole[2] = labFrameDipole[offset+2];
// set out-of-range elements to 0.0f
labFrameDipole[particleIndex*3] = molDipole[0]*vectorX[0] + molDipole[1]*vectorY[0] + molDipole[2]*vectorZ[0];
labFrameDipole[particleIndex*3+1] = molDipole[0]*vectorX[1] + molDipole[1]*vectorY[1] + molDipole[2]*vectorZ[1];
labFrameDipole[particleIndex*3+2] = molDipole[0]*vectorX[2] + molDipole[1]*vectorY[2] + molDipole[2]*vectorZ[2];
labFrameDipole[offset] = molDipole[0]*vectorX[0] + molDipole[1]*vectorY[0] + molDipole[2]*vectorZ[0];
labFrameDipole[offset+1] = molDipole[0]*vectorX[1] + molDipole[1]*vectorY[1] + molDipole[2]*vectorZ[1];
labFrameDipole[offset+2] = molDipole[0]*vectorX[2] + molDipole[1]*vectorY[2] + molDipole[2]*vectorZ[2];
// ---------------------------------------------------------------------------------------
float mPole[3][3];
unsigned int offset = particleIndex*9;
offset = 9*particleIndex;
mPole[0][0] = labFrameQuadrupole[offset];
mPole[0][1] = labFrameQuadrupole[offset+1];
......@@ -363,6 +370,8 @@ void kCudaComputeLabFrameMoments_kernel( void )
labFrameQuadrupole[offset+6] = labFrameQuadrupole[offset+2];
labFrameQuadrupole[offset+7] = labFrameQuadrupole[offset+5];
}
particleIndex += gridDim.x*blockDim.x;
}
......@@ -384,28 +393,24 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if( 0 && amoebaGpu->log ){
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: numBlocks/atoms=%d\n", methodName, numBlocks ); (void) fflush( amoebaGpu->log );
amoebaGpu->psMultipoleParticlesIdsAndAxisType->Download();
amoebaGpu->psMolecularDipole->Download();
gpu->psPosq4->Download();
for( int ii = 0; ii < gpu->natoms; ii++ ){
int mIndex = 3*ii;
(void) fprintf( amoebaGpu->log,"%6d [%6d %6d %6d] x[%16.9e %16.9e %16.9e] dpl[%16.9e %16.9e %16.9e]\nRot[%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n\n", ii,
(void) fprintf( amoebaGpu->log,"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s\n", ii,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].x,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].y,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].z,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].w,
gpu->psPosq4->_pSysData[ii].x,
gpu->psPosq4->_pSysData[ii].y,
gpu->psPosq4->_pSysData[ii].z,
amoebaGpu->psMolecularDipole->_pSysData[mIndex],
amoebaGpu->psMolecularDipole->_pSysData[mIndex+1],
amoebaGpu->psMolecularDipole->_pSysData[mIndex+2] );
if( ii == 30 )ii = gpu->natoms - 30;
gpu->psPosq4->_pSysData[ii].z, (amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysData[ii].w > 1 ? " XXX" : "") );
//if( ii == 30 )ii = gpu->natoms - 30;
}
}
// int64 kernelTime = AmoebaTiming::getTimeOfDay();
double kernelTime = 0.0;
#endif
// copy molecular moments to lab frame moment arrays
......@@ -431,7 +436,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaLabFrameMoments( amoebaGpu );
if( 1 ){
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
//fileId.push_back( 0 );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment