Commit b8e12104 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Cutoffs/PBC for Vdw force

parent 408469c3
...@@ -136,3 +136,8 @@ extern void SetCustomExternalGlobalParams(const std::vector<float>& paramValues) ...@@ -136,3 +136,8 @@ extern void SetCustomExternalGlobalParams(const std::vector<float>& paramValues)
extern void SetCustomNonbondedForceExpression(const Expression<256>& expression); extern void SetCustomNonbondedForceExpression(const Expression<256>& expression);
extern void SetCustomNonbondedEnergyExpression(const Expression<256>& expression); extern void SetCustomNonbondedEnergyExpression(const Expression<256>& expression);
extern void SetCustomNonbondedGlobalParams(const std::vector<float>& paramValues); extern void SetCustomNonbondedGlobalParams(const std::vector<float>& paramValues);
extern __global__ void OPENMMCUDA_EXPORT kFindBlockBoundsPeriodic_kernel( void );
extern __global__ void OPENMMCUDA_EXPORT kFindBlocksWithInteractionsPeriodic_kernel( void );
extern __global__ void OPENMMCUDA_EXPORT kFindInteractionsWithinBlocksPeriodic_kernel( unsigned int* workUnit );
...@@ -143,11 +143,41 @@ public: ...@@ -143,11 +143,41 @@ public:
*/ */
void getParticleExclusions( int particleIndex, std::vector< int >& exclusions ) const; void getParticleExclusions( int particleIndex, std::vector< int >& exclusions ) const;
/**
* Set cutoff
*
* @param cutoff cutoff
*/
void setCutoff( double cutoff );
/**
* Get cutoff
*
* @return cutoff
*/
double getCutoff( void ) const;
/**
* Set flag for employing periodic boundary conditions
*
* @param pbcFlag if nonozero, use periodic boundary conditions
*/
void setPBC( int pbcFlag );
/**
* Get periodic boundary conditions flag
*
* @return periodic boundary conditions flag (nonzero -> use PBC)
*/
int getPBC( void ) const;
protected: protected:
ForceImpl* createImpl(); ForceImpl* createImpl();
private: private:
class VdwInfo; class VdwInfo;
int usePBC;
double cutoff;
std::string sigmaCombiningRule; std::string sigmaCombiningRule;
std::string epsilonCombiningRule; std::string epsilonCombiningRule;
std::vector< std::vector<int> > exclusions; std::vector< std::vector<int> > exclusions;
...@@ -170,7 +200,7 @@ private: ...@@ -170,7 +200,7 @@ private:
class AmoebaVdwForce::VdwInfo { class AmoebaVdwForce::VdwInfo {
public: public:
int ivIndex, classIndex; int ivIndex, classIndex;
double reductionFactor, sigma, epsilon; double reductionFactor, sigma, epsilon, cutoff;
VdwInfo() { VdwInfo() {
ivIndex = classIndex = -1; ivIndex = classIndex = -1;
reductionFactor = 0.0; reductionFactor = 0.0;
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
using namespace OpenMM; using namespace OpenMM;
AmoebaVdwForce::AmoebaVdwForce() { AmoebaVdwForce::AmoebaVdwForce() : usePBC(0), cutoff(1.0e+10) {
} }
int AmoebaVdwForce::addParticle(int ivIndex, int classIndex, double sigma, double epsilon, double reductionFactor ) { int AmoebaVdwForce::addParticle(int ivIndex, int classIndex, double sigma, double epsilon, double reductionFactor ) {
...@@ -102,6 +102,22 @@ void AmoebaVdwForce::getParticleExclusions( int particleIndex, std::vector< int ...@@ -102,6 +102,22 @@ void AmoebaVdwForce::getParticleExclusions( int particleIndex, std::vector< int
} }
void AmoebaVdwForce::setCutoff( double inputCutoff ){
cutoff = inputCutoff;
}
double AmoebaVdwForce::getCutoff( void ) const {
return cutoff;
}
void AmoebaVdwForce::setPBC( int pbcFlag ){
usePBC = pbcFlag;
}
int AmoebaVdwForce::getPBC( void ) const {
return usePBC;
}
ForceImpl* AmoebaVdwForce::createImpl() { ForceImpl* AmoebaVdwForce::createImpl() {
return new AmoebaVdwForceImpl(*this); return new AmoebaVdwForceImpl(*this);
} }
...@@ -954,7 +954,7 @@ static void computeAmoebaVdwForce( AmoebaCudaData& data ) { ...@@ -954,7 +954,7 @@ static void computeAmoebaVdwForce( AmoebaCudaData& data ) {
// Vdw14_7F // Vdw14_7F
kCalculateAmoebaVdw14_7Forces(gpu); kCalculateAmoebaVdw14_7Forces(gpu, data.getApplyCutoff());
} }
class CudaCalcAmoebaVdwForceKernel::ForceInfo : public CudaForceInfo { class CudaCalcAmoebaVdwForceKernel::ForceInfo : public CudaForceInfo {
...@@ -1014,7 +1014,7 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba ...@@ -1014,7 +1014,7 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
gpuSetAmoebaVdwParameters( data.getAmoebaGpu(), indexIVs, indexClasses, sigmas, epsilons, reductions, gpuSetAmoebaVdwParameters( data.getAmoebaGpu(), indexIVs, indexClasses, sigmas, epsilons, reductions,
force.getSigmaCombiningRule(), force.getEpsilonCombiningRule(), force.getSigmaCombiningRule(), force.getEpsilonCombiningRule(),
allExclusions ); allExclusions, force.getPBC(), static_cast<float>(force.getCutoff()) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force)); data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
} }
......
...@@ -339,8 +339,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log ) ...@@ -339,8 +339,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, " maxCovalentDegreeSz %d\n", amoebaGpu->maxCovalentDegreeSz ); (void) fprintf( log, " maxCovalentDegreeSz %d\n", amoebaGpu->maxCovalentDegreeSz );
(void) fprintf( log, " solventDielectric %10.3f\n", amoebaGpu->solventDielectric); (void) fprintf( log, " solventDielectric %10.3f\n", amoebaGpu->solventDielectric);
(void) fprintf( log, " pGamma %10.3f\n", amoebaGpu->pGamma );
(void) fprintf( log, " scalingDistanceCutoff %10.3f\n", amoebaGpu->scalingDistanceCutoff );
(void) fprintf( log, " scalingDistanceCutoff %15.7e\n", amoebaGpu->amoebaSim.scalingDistanceCutoff ); (void) fprintf( log, " scalingDistanceCutoff %15.7e\n", amoebaGpu->amoebaSim.scalingDistanceCutoff );
(void) fprintf( log, " pDampingFactorAndThole %p\n", amoebaGpu->amoebaSim.pDampingFactorAndThole ); (void) fprintf( log, " pDampingFactorAndThole %p\n", amoebaGpu->amoebaSim.pDampingFactorAndThole );
(void) fprintf( log, " pScaleIndicesIndex %p\n", amoebaGpu->amoebaSim.pScaleIndicesIndex ); (void) fprintf( log, " pScaleIndicesIndex %p\n", amoebaGpu->amoebaSim.pScaleIndicesIndex );
...@@ -384,7 +382,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log ) ...@@ -384,7 +382,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
gpuPrintCudaStreamFloat( amoebaGpu->psWorkVector[0], log ); gpuPrintCudaStreamFloat( amoebaGpu->psWorkVector[0], log );
gpuPrintCudaStreamFloat( amoebaGpu->psForce, log ); gpuPrintCudaStreamFloat( amoebaGpu->psForce, log );
gpuPrintCudaStreamFloat( amoebaGpu->psTorque, log ); gpuPrintCudaStreamFloat( amoebaGpu->psTorque, log );
gpuPrintCudaStreamFloat( amoebaGpu->psEnergy, log );
gpuPrintCudaStreamFloat( amoebaGpu->torqueMapForce, log ); gpuPrintCudaStreamFloat( amoebaGpu->torqueMapForce, log );
(void) fprintf( log, " maxMapTorqueDifference %d\n", amoebaGpu->maxMapTorqueDifference ); (void) fprintf( log, " maxMapTorqueDifference %d\n", amoebaGpu->maxMapTorqueDifference );
...@@ -404,12 +401,11 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log ) ...@@ -404,12 +401,11 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, "\n" ); (void) fprintf( log, "\n" );
(void) fprintf( log, " useVdwTable %u\n", amoebaGpu->useVdwTable);
(void) fprintf( log, " vdwTableSize %u\n", amoebaGpu->vdwTableSize);
(void) fprintf( log, " vdwSigmaCombiningRule %d\n", amoebaGpu->vdwSigmaCombiningRule); (void) fprintf( log, " vdwSigmaCombiningRule %d\n", amoebaGpu->vdwSigmaCombiningRule);
(void) fprintf( log, " vdwEpsilonCombiningRule %d\n", amoebaGpu->vdwEpsilonCombiningRule); (void) fprintf( log, " vdwEpsilonCombiningRule %d\n", amoebaGpu->vdwEpsilonCombiningRule);
(void) fprintf( log, " vdwUsePBC %d\n", amoebaGpu->amoebaSim.vdwUsePBC);
(void) fprintf( log, " vdwCutoff2 %15.7e\n", amoebaGpu->amoebaSim.vdwCutoff2);
gpuPrintCudaStreamFloat2( amoebaGpu->psVdwSigmaEpsilon, log ); gpuPrintCudaStreamFloat2( amoebaGpu->psVdwSigmaEpsilon, log );
gpuPrintCudaStreamFloat2( amoebaGpu->psVdwTable, log );
gpuPrintCudaStreamInt( amoebaGpu->psAmoebaVdwNonReductionID, log ); gpuPrintCudaStreamInt( amoebaGpu->psAmoebaVdwNonReductionID, log );
gpuPrintCudaStreamInt4( amoebaGpu->psAmoebaVdwReductionID, log ); gpuPrintCudaStreamInt4( amoebaGpu->psAmoebaVdwReductionID, log );
gpuPrintCudaStreamFloat( amoebaGpu->psAmoebaVdwReduction, log ); gpuPrintCudaStreamFloat( amoebaGpu->psAmoebaVdwReduction, log );
...@@ -1469,7 +1465,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect ...@@ -1469,7 +1465,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu->maxCovalentDegreeSz = maxCovalentRange; amoebaGpu->maxCovalentDegreeSz = maxCovalentRange;
amoebaGpu->paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; amoebaGpu->paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
amoebaGpu->scalingDistanceCutoff = static_cast<float>(scalingDistanceCutoff);
gpuRotationToLabFrameAllocate( amoebaGpu ); gpuRotationToLabFrameAllocate( amoebaGpu );
gpuFixedEFieldAllocate( amoebaGpu ); gpuFixedEFieldAllocate( amoebaGpu );
...@@ -1708,11 +1703,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect ...@@ -1708,11 +1703,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// covalent/polarization degree // covalent/polarization degree
if( ii < 1 ){
(void) fprintf( amoebaGpu->log,"Gamma=%.5f scaledDistCutoff=%.5f\n",
amoebaGpu->pGamma, amoebaGpu->scalingDistanceCutoff );
}
(void) fprintf( amoebaGpu->log,"%3d covalent/polarization degree: minIdx[%6d %6d] Thole=%12.5f dampingFactor=%12.5f\n", ii, (void) fprintf( amoebaGpu->log,"%3d covalent/polarization degree: minIdx[%6d %6d] Thole=%12.5f dampingFactor=%12.5f\n", ii,
amoebaGpu->psCovalentDegree->_pSysStream[0][particlesOffset], amoebaGpu->psPolarizationDegree->_pSysStream[0][particlesOffset], amoebaGpu->psCovalentDegree->_pSysStream[0][particlesOffset], amoebaGpu->psPolarizationDegree->_pSysStream[0][particlesOffset],
amoebaGpu->psDampingFactorAndThole->_pSysStream[0][ii].y, amoebaGpu->psDampingFactorAndThole->_pSysStream[0][ii].x ); amoebaGpu->psDampingFactorAndThole->_pSysStream[0][ii].y, amoebaGpu->psDampingFactorAndThole->_pSysStream[0][ii].x );
...@@ -1884,7 +1874,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect ...@@ -1884,7 +1874,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// upload // upload
amoebaGpu->amoebaSim.scalingDistanceCutoff = amoebaGpu->scalingDistanceCutoff; amoebaGpu->amoebaSim.scalingDistanceCutoff = static_cast<float>(scalingDistanceCutoff);
amoebaGpu->amoebaSim.numberOfAtoms = amoebaGpu->gpuContext->natoms; amoebaGpu->amoebaSim.numberOfAtoms = amoebaGpu->gpuContext->natoms;
amoebaGpu->amoebaSim.paddedNumberOfAtoms = amoebaGpu->paddedNumberOfAtoms; amoebaGpu->amoebaSim.paddedNumberOfAtoms = amoebaGpu->paddedNumberOfAtoms;
...@@ -1992,7 +1982,8 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu, ...@@ -1992,7 +1982,8 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const std::vector<float>& reductions, const std::vector<float>& reductions,
const std::string& vdwSigmaCombiningRule, const std::string& vdwSigmaCombiningRule,
const std::string& vdwEpsilonCombiningRule, const std::string& vdwEpsilonCombiningRule,
const std::vector< std::vector<int> >& allExclusions ) const std::vector< std::vector<int> >& allExclusions,
int usePBC, float cutoff )
{ {
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -2000,10 +1991,14 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu, ...@@ -2000,10 +1991,14 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
amoebaGpu->paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; amoebaGpu->paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
unsigned int particles = sigmas.size(); unsigned int particles = sigmas.size();
amoebaGpu->amoebaSim.vdwUsePBC = usePBC;
amoebaGpu->amoebaSim.vdwCutoff2 = cutoff*cutoff;
// set sigma combining rule flag // set sigma combining rule flag
if( vdwSigmaCombiningRule.compare( "ARITHMETIC" ) == 0 ){ if( vdwSigmaCombiningRule.compare( "ARITHMETIC" ) == 0 ){
...@@ -2157,13 +2152,12 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu, ...@@ -2157,13 +2152,12 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
psVdwReductionID->Upload(); psVdwReductionID->Upload();
psAmoebaVdwReduction->Upload(); psAmoebaVdwReduction->Upload();
amoebaGpuBuildOutputBuffers( amoebaGpu );
amoebaGpuBuildVdwExclusionList( amoebaGpu, allExclusions ); amoebaGpuBuildVdwExclusionList( amoebaGpu, allExclusions );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
unsigned int maxPrint = 32; unsigned int maxPrint = 32;
(void) fprintf( amoebaGpu->log, "%s useVdwTable=%d size=%d\n", methodName, amoebaGpu->useVdwTable,
(amoebaGpu->useVdwTable ? amoebaGpu->vdwTableSize : 0) );
(void) fprintf( amoebaGpu->log, "%s sigma/epsilon combining rules=%d %d\n", methodName, (void) fprintf( amoebaGpu->log, "%s sigma/epsilon combining rules=%d %d\n", methodName,
amoebaGpu->vdwSigmaCombiningRule, amoebaGpu->vdwEpsilonCombiningRule); amoebaGpu->vdwSigmaCombiningRule, amoebaGpu->vdwEpsilonCombiningRule);
for (unsigned int ii = 0; ii < gpu->natoms; ii++) for (unsigned int ii = 0; ii < gpu->natoms; ii++)
...@@ -2659,7 +2653,6 @@ void amoebaGpuShutDown(amoebaGpuContext gpu) ...@@ -2659,7 +2653,6 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
delete gpu->psWorkVector[3]; delete gpu->psWorkVector[3];
delete gpu->psForce; delete gpu->psForce;
delete gpu->psTorque; delete gpu->psTorque;
delete gpu->psEnergy;
delete gpu->torqueMapForce; delete gpu->torqueMapForce;
delete gpu->psGk_Field; delete gpu->psGk_Field;
...@@ -2671,7 +2664,6 @@ void amoebaGpuShutDown(amoebaGpuContext gpu) ...@@ -2671,7 +2664,6 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
delete gpu->psKirkwoodEDiffForce; delete gpu->psKirkwoodEDiffForce;
delete gpu->psVdwSigmaEpsilon; delete gpu->psVdwSigmaEpsilon;
delete gpu->psVdwTable;
delete gpu->psAmoebaVdwNonReductionID; delete gpu->psAmoebaVdwNonReductionID;
delete gpu->psAmoebaVdwReductionID; delete gpu->psAmoebaVdwReductionID;
delete gpu->psAmoebaVdwReduction; delete gpu->psAmoebaVdwReduction;
...@@ -2768,7 +2760,12 @@ extern "C" ...@@ -2768,7 +2760,12 @@ extern "C"
void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu ) void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu )
{ {
if( amoebaGpu->nonbondBlocks == amoebaGpu->gpuContext->sim.blocks ){
return;
}
unsigned int paddedNumberOfAtoms = amoebaGpu->paddedNumberOfAtoms; unsigned int paddedNumberOfAtoms = amoebaGpu->paddedNumberOfAtoms;
amoebaGpu->nonbondBlocks = amoebaGpu->gpuContext->sim.blocks; amoebaGpu->nonbondBlocks = amoebaGpu->gpuContext->sim.blocks;
amoebaGpu->threadsPerBlock = amoebaGpu->gpuContext->sim.threads_per_block; amoebaGpu->threadsPerBlock = amoebaGpu->gpuContext->sim.threads_per_block;
...@@ -2810,6 +2807,15 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu ) ...@@ -2810,6 +2807,15 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu )
amoebaGpu->fieldReduceThreadsPerBlock ); amoebaGpu->fieldReduceThreadsPerBlock );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
} }
if( amoebaGpu->psWorkArray_3_1 ){
delete amoebaGpu->psWorkArray_3_1;
delete amoebaGpu->psWorkArray_3_2;
delete amoebaGpu->psWorkArray_3_3;
delete amoebaGpu->psWorkArray_3_4;
delete amoebaGpu->psWorkArray_1_1;
delete amoebaGpu->psWorkArray_1_2;
}
amoebaGpu->psWorkArray_3_1 = new CUDAStream<float>(3*paddedNumberOfAtoms, (amoebaGpu->outputBuffers), "AmoebaField_3_1"); amoebaGpu->psWorkArray_3_1 = new CUDAStream<float>(3*paddedNumberOfAtoms, (amoebaGpu->outputBuffers), "AmoebaField_3_1");
amoebaGpu->amoebaSim.pWorkArray_3_1 = amoebaGpu->psWorkArray_3_1->_pDevStream[0]; amoebaGpu->amoebaSim.pWorkArray_3_1 = amoebaGpu->psWorkArray_3_1->_pDevStream[0];
...@@ -2826,8 +2832,6 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu ) ...@@ -2826,8 +2832,6 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu )
amoebaGpu->psWorkArray_1_2 = new CUDAStream<float>( paddedNumberOfAtoms, (amoebaGpu->outputBuffers), "AmoebaField_1_2"); amoebaGpu->psWorkArray_1_2 = new CUDAStream<float>( paddedNumberOfAtoms, (amoebaGpu->outputBuffers), "AmoebaField_1_2");
amoebaGpu->amoebaSim.pWorkArray_1_2 = amoebaGpu->psWorkArray_1_2->_pDevStream[0]; amoebaGpu->amoebaSim.pWorkArray_1_2 = amoebaGpu->psWorkArray_1_2->_pDevStream[0];
amoebaGpu->psEnergy = new CUDAStream<float>(amoebaGpu->energyOutputBuffers, 1, "AmoebaEnergy");
return; return;
} }
...@@ -2928,87 +2932,23 @@ int amoebaGpuBuildThreadBlockWorkList( amoebaGpuContext amoebaGpu ) ...@@ -2928,87 +2932,23 @@ int amoebaGpuBuildThreadBlockWorkList( amoebaGpuContext amoebaGpu )
CUDAStream<unsigned int>* psWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "WorkUnit"); CUDAStream<unsigned int>* psWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "WorkUnit");
unsigned int* pWorkList = psWorkUnit->_pSysData; unsigned int* pWorkList = psWorkUnit->_pSysData;
amoebaGpu->psWorkUnit = psWorkUnit; amoebaGpu->psWorkUnit = psWorkUnit;
memset( amoebaGpu->psWorkUnit->_pSysStream[0], 0, cells*sizeof( unsigned int) );
CUDAStream<unsigned int>* psVdwWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "VdwWorkUnit"); CUDAStream<unsigned int>* psVdwWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "VdwWorkUnit");
unsigned int* pVdwWorkList = psVdwWorkUnit->_pSysData; unsigned int* pVdwWorkList = psVdwWorkUnit->_pSysData;
amoebaGpu->amoebaSim.pVdwWorkUnit = psVdwWorkUnit->_pDevStream[0];
amoebaGpu->psVdwWorkUnit = psVdwWorkUnit; amoebaGpu->psVdwWorkUnit = psVdwWorkUnit;
memset( amoebaGpu->psVdwWorkUnit->_pSysStream[0], 0, cells*sizeof( unsigned int) );
/*
CUDAStream<unsigned int>* psInteractingWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "InteractingWorkUnit");
amoebaGpu->psInteractingWorkUnit = psInteractingWorkUnit;
amoebaGpu->workUnits = cells;
CUDAStream<unsigned int>* psInteractionFlag = new CUDAStream<unsigned int>(cells, 1u, "InteractionFlag");
amoebaGpu->psInteractionFlag = psInteractionFlag;
amoebaGpu->sim.pInteractionFlag = psInteractionFlag->_pDevStream[0];
CUDAStream<size_t>* psInteractionCount = new CUDAStream<size_t>(1, 1u, "InteractionCount");
amoebaGpu->psInteractionCount = psInteractionCount;
amoebaGpu->sim.pInteractionCount = psInteractionCount->_pDevStream[0];
CUDAStream<float4>* psGridBoundingBox = new CUDAStream<float4>(dim, 1u, "GridBoundingBox");
amoebaGpu->psGridBoundingBox = psGridBoundingBox;
amoebaGpu->sim.pGridBoundingBox = psGridBoundingBox->_pDevStream[0];
CUDAStream<float4>* psGridCenter = new CUDAStream<float4>(dim, 1u, "GridCenter");
amoebaGpu->psGridCenter = psGridCenter;
amoebaGpu->sim.pGridCenter = psGridCenter->_pDevStream[0];
amoebaGpu->sim.nonbond_workBlock = amoebaGpu->sim.nonbondThreadsPerBlock / GRID;
amoebaGpu->sim.bornForce2_workBlock = amoebaGpu->sim.bornForce2_threads_per_block / GRID;
amoebaGpu->sim.workUnits = cells;
*/
// Initialize the plan for doing stream compaction.
// planCompaction(amoebaGpu->compactPlan);
// Increase block count if necessary for extra large molecules that would
// otherwise overflow the SM workunit buffers
// int minimumBlocks = (cells + amoebaGpu->sim.workUnitsPerSM - 1) / amoebaGpu->sim.workUnitsPerSM;
// if ((int) amoebaGpu->sim.nonbond_blocks < minimumBlocks)
// {
// amoebaGpu->sim.nonbond_blocks = amoebaGpu->sim.nonbond_blocks * ((minimumBlocks + amoebaGpu->sim.nonbond_blocks - 1) / amoebaGpu->sim.nonbond_blocks);
// }
// if ((int) amoebaGpu->sim.bornForce2_blocks < minimumBlocks)
// {
// amoebaGpu->sim.bornForce2_blocks = amoebaGpu->sim.bornForce2_blocks * ((minimumBlocks + amoebaGpu->sim.bornForce2_blocks - 1) / amoebaGpu->sim.bornForce2_blocks);
// }
/*
amoebaGpu->sim.nbWorkUnitsPerBlock = cells / amoebaGpu->sim.nonbond_blocks;
amoebaGpu->sim.nbWorkUnitsPerBlockRemainder = cells - amoebaGpu->sim.nonbond_blocks * amoebaGpu->sim.nbWorkUnitsPerBlock;
amoebaGpu->sim.interaction_threads_per_block = 64;
amoebaGpu->sim.interaction_blocks = (amoebaGpu->workUnits + amoebaGpu->sim.interaction_threads_per_block - 1) / amoebaGpu->sim.interaction_threads_per_block;
if (amoebaGpu->sim.interaction_blocks > 8*amoebaGpu->sim.blocks)
amoebaGpu->sim.interaction_blocks = 8*amoebaGpu->sim.blocks;
if (activeWorkUnits > (int) cells)
{
int balancedWorkBlock = (cells + amoebaGpu->sim.nonbond_blocks - 1) / amoebaGpu->sim.nonbond_blocks;
amoebaGpu->sim.nonbondThreadsPerBlock = balancedWorkBlock * GRID;
amoebaGpu->sim.nonbond_workBlock = balancedWorkBlock;
}
activeWorkUnits = amoebaGpu->sim.bornForce2_blocks * amoebaGpu->sim.bornForce2_workBlock;
if (activeWorkUnits > (int) cells)
{
int balancedWorkBlock = (cells + amoebaGpu->sim.bornForce2_blocks - 1) / amoebaGpu->sim.bornForce2_blocks;
amoebaGpu->sim.bornForce2_threads_per_block = balancedWorkBlock * GRID;
amoebaGpu->sim.bornForce2_workBlock = balancedWorkBlock;
}
*/
unsigned int count = 0; unsigned int count = 0;
for (unsigned int y = 0; y < dim; y++) for (unsigned int y = 0; y < dim; y++)
{ {
for (unsigned int x = y; x < dim; x++) for (unsigned int x = y; x < dim; x++, count++)
{ {
pWorkList[count] = encodeCell( x, y ); pWorkList[count] = encodeCell( x, y );
pVdwWorkList[count] = encodeCell( x, y ); pVdwWorkList[count] = pWorkList[count];
count++;
} }
} }
//(*amoebaGpu->psInteractionCount)[0] = amoebaGpu->workUnits;
//amoebaGpu->psInteractionCount->Upload();
psWorkUnit->Upload(); psWorkUnit->Upload();
psVdwWorkUnit->Upload(); psVdwWorkUnit->Upload();
...@@ -3100,8 +3040,8 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu ) ...@@ -3100,8 +3040,8 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
CUDAStream<int>* psScalingIndicesIndex = new CUDAStream<int>(cells, 1u, "ScalingIndicesIndex"); CUDAStream<int>* psScalingIndicesIndex = new CUDAStream<int>(cells, 1u, "ScalingIndicesIndex");
amoebaGpu->psScalingIndicesIndex = psScalingIndicesIndex; amoebaGpu->psScalingIndicesIndex = psScalingIndicesIndex;
amoebaGpu->amoebaSim.pScaleIndicesIndex = psScalingIndicesIndex->_pDevStream[0]; amoebaGpu->amoebaSim.pScaleIndicesIndex = psScalingIndicesIndex->_pDevStream[0];
memset( amoebaGpu->psScalingIndicesIndex->_pSysStream[0], 0, cells*sizeof(unsigned int) );
memset( amoebaGpu->psScalingIndicesIndex->_pSysStream[0], 0, sizeof( cells)*sizeof( unsigned int) );
int numWithScalingIndices = 0; int numWithScalingIndices = 0;
int gridOffset = grid - 1; int gridOffset = grid - 1;
int lastBlock = (static_cast<int>(paddedAtoms) > amoebaGpu->gpuContext->natoms) ? (amoebaGpu->gpuContext->natoms)/grid : -1; int lastBlock = (static_cast<int>(paddedAtoms) > amoebaGpu->gpuContext->natoms) ? (amoebaGpu->gpuContext->natoms)/grid : -1;
...@@ -4308,19 +4248,17 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){ ...@@ -4308,19 +4248,17 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
void gpuCopyWorkUnit( amoebaGpuContext amoebaGpu ){ void gpuCopyWorkUnit( amoebaGpuContext amoebaGpu ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
/*
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
gpu->psInteractingWorkUnit->Download(); gpu->psInteractingWorkUnit->Download();
gpu->psWorkUnit->Download(); gpu->psWorkUnit->Download();
amoebaGpu->psWorkUnit->Download(); amoebaGpu->psWorkUnit->Download();
(void) fprintf( amoebaGpu->log, "gpuCopyInteractingWorkUnit called -- to be removed.\n" );
for( unsigned int ii = 0; ii < gpu->psInteractingWorkUnit->_length; ii++ ){ for( unsigned int ii = 0; ii < gpu->psInteractingWorkUnit->_length; ii++ ){
//gpu->psInteractingWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii];
gpu->psWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii]; gpu->psWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii];
} }
gpu->psInteractingWorkUnit->Upload(); gpu->psInteractingWorkUnit->Upload();
gpu->psWorkUnit->Upload(); gpu->psWorkUnit->Upload();
*/
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
......
...@@ -57,7 +57,7 @@ extern void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool per ...@@ -57,7 +57,7 @@ extern void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool per
extern void SetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext gpu); extern void SetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext gpu); extern void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext gpu);
extern void kCalculateAmoebaVdw14_7Forces(amoebaGpuContext amoebaGpu ); extern void kCalculateAmoebaVdw14_7Forces(amoebaGpuContext amoebaGpu, int applyCutoff );
// wca dispersion // wca dispersion
......
...@@ -146,8 +146,11 @@ struct cudaAmoebaGmxSimulation { ...@@ -146,8 +146,11 @@ struct cudaAmoebaGmxSimulation {
float* pWorkArray_1_1; float* pWorkArray_1_1;
float* pWorkArray_1_2; float* pWorkArray_1_2;
int vdwUsePBC;
float vdwCutoff2;
unsigned int amoebaVdwNonReductions; unsigned int amoebaVdwNonReductions;
int* pAmoebaVdwNonReductionID; int* pAmoebaVdwNonReductionID;
unsigned int* pVdwWorkUnit;
unsigned int amoebaVdwReductions; unsigned int amoebaVdwReductions;
int4* pAmoebaVdwReductionID; int4* pAmoebaVdwReductionID;
......
...@@ -136,8 +136,6 @@ struct _amoebaGpuContext { ...@@ -136,8 +136,6 @@ struct _amoebaGpuContext {
// scaling-related parameters // scaling-related parameters
float pGamma;
float scalingDistanceCutoff;
CUDAStream<float2>* psDampingFactorAndThole; CUDAStream<float2>* psDampingFactorAndThole;
// slated for removal -- no longer used // slated for removal -- no longer used
...@@ -177,7 +175,6 @@ struct _amoebaGpuContext { ...@@ -177,7 +175,6 @@ struct _amoebaGpuContext {
CUDAStream<float>* psForce; CUDAStream<float>* psForce;
CUDAStream<float>* psTorque; CUDAStream<float>* psTorque;
CUDAStream<float>* psEnergy;
CUDAStream<float>* torqueMapForce; CUDAStream<float>* torqueMapForce;
int maxMapTorqueDifference; int maxMapTorqueDifference;
int maxMapTorqueDifferencePow2; int maxMapTorqueDifferencePow2;
...@@ -198,9 +195,6 @@ struct _amoebaGpuContext { ...@@ -198,9 +195,6 @@ struct _amoebaGpuContext {
CUDAStream<float2>* psVdwSigmaEpsilon; CUDAStream<float2>* psVdwSigmaEpsilon;
unsigned int useVdwTable;
unsigned int vdwTableSize;
CUDAStream<float2>* psVdwTable;
CUDAStream<int>* psAmoebaVdwNonReductionID; CUDAStream<int>* psAmoebaVdwNonReductionID;
CUDAStream<int4>* psAmoebaVdwReductionID; CUDAStream<int4>* psAmoebaVdwReductionID;
CUDAStream<float>* psAmoebaVdwReduction; CUDAStream<float>* psAmoebaVdwReduction;
...@@ -322,7 +316,7 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu, ...@@ -322,7 +316,7 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const std::vector<float>& reductions, const std::vector<float>& reductions,
const std::string& sigmaCombiningRule, const std::string& sigmaCombiningRule,
const std::string& epsilonCombiningRule, const std::string& epsilonCombiningRule,
const std::vector< std::vector<int> >& allExclusions ); const std::vector< std::vector<int> >& allExclusions, int usePBC, float cutoff );
extern "C" extern "C"
void gpuSetAmoebaPMEParameters(amoebaGpuContext amoebaGpu, float alpha, int gridSizeX, int gridSizeY, int gridSizeZ); void gpuSetAmoebaPMEParameters(amoebaGpuContext amoebaGpu, float alpha, int gridSizeX, int gridSizeY, int gridSizeZ);
......
...@@ -766,11 +766,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -766,11 +766,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d" (void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d ZZZ\n",
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ\n", methodName, gpu->natoms, amoebaGpu->maxCovalentDegreeSz );
methodName, gpu->natoms,
amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma,
amoebaGpu->scalingDistanceCutoff );
} }
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
......
...@@ -1861,10 +1861,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu ) ...@@ -1861,10 +1861,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d" (void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d ZZZ\n",
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ\n", methodName, gpu->natoms, amoebaGpu->maxCovalentDegreeSz );
methodName, gpu->natoms,
amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma,
amoebaGpu->scalingDistanceCutoff ); amoebaGpu->scalingDistanceCutoff );
} }
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
......
...@@ -1035,11 +1035,8 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu ) ...@@ -1035,11 +1035,8 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d" (void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d ZZZ\n",
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ\n", methodName, gpu->natoms, amoebaGpu->maxCovalentDegreeSz );
methodName, gpu->natoms,
amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma,
amoebaGpu->scalingDistanceCutoff );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
} }
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
......
...@@ -1028,11 +1028,11 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP ...@@ -1028,11 +1028,11 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP #undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b #define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaPmeDirectElectrostatic.h" #include "kCalculateAmoebaCudaPmeDirectElectrostatic.h"
#define USE_OUTPUT_BUFFER_PER_WARP #define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME #undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b #define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaPmeDirectElectrostatic.h" #include "kCalculateAmoebaCudaPmeDirectElectrostatic.h"
// reduce psWorkArray_3_1 -> force // reduce psWorkArray_3_1 -> force
...@@ -1088,11 +1088,8 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1088,11 +1088,8 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d" (void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d ZZZ\n",
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ\n", methodName, gpu->natoms, amoebaGpu->maxCovalentDegreeSz );
methodName, gpu->natoms,
amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma,
amoebaGpu->scalingDistanceCutoff );
} }
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
...@@ -1142,26 +1139,26 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1142,26 +1139,26 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
maxThreads = 128; maxThreads = 128;
else else
maxThreads = 64; maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(PmeDirectElectrostaticParticle)+sizeof(float3)), maxThreads); threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(PmeDirectElectrostaticParticle)), maxThreads);
} }
kClearFields_3( amoebaGpu, 2 ); kClearFields_3( amoebaGpu, 2 );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaPmeDirectElectrostaticN2Forces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u\n", (void) fprintf( amoebaGpu->log, "kCalculateAmoebaPmeDirectElectrostaticCutoffForces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u\n",
threadsPerBlock, getThreadsPerBlock(amoebaGpu, sizeof(PmeDirectElectrostaticParticle)+sizeof(float3)), threadsPerBlock, getThreadsPerBlock(amoebaGpu, sizeof(PmeDirectElectrostaticParticle)),
(sizeof(PmeDirectElectrostaticParticle)+sizeof(float3)) ); sizeof(PmeDirectElectrostaticParticle) );
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaPmeDirectElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u\n", (void) fprintf( amoebaGpu->log, "kCalculateAmoebaPmeDirectElectrostaticCutoffForces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp, amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(PmeDirectElectrostaticParticle)+sizeof(float3), (sizeof(PmeDirectElectrostaticParticle)+sizeof(float3))*threadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits, sizeof(PmeDirectElectrostaticParticle), (sizeof(PmeDirectElectrostaticParticle))*threadsPerBlock,
gpu->sim.nonbond_threads_per_block ); amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits, gpu->sim.nonbond_threads_per_block );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#endif #endif
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaPmeDirectElectrostaticN2ByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, (sizeof(PmeDirectElectrostaticParticle)+sizeof(float3))*threadsPerBlock>>>( kCalculateAmoebaPmeDirectElectrostaticCutoffByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(PmeDirectElectrostaticParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -1176,7 +1173,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1176,7 +1173,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
// gpu->sim.pInteractingWorkUnit, // gpu->sim.pInteractingWorkUnit,
// amoebaGpu->psWorkUnit->_pDevStream[0], // amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectElectrostaticN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, (sizeof(PmeDirectElectrostaticParticle)+sizeof(float3))*threadsPerBlock>>>( kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(PmeDirectElectrostaticParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -1186,7 +1183,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1186,7 +1183,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
amoebaGpu->psWorkArray_3_2->_pDevStream[0] ); amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif #endif
} }
LAUNCHERROR("kCalculateAmoebaPmeDirectElectrostaticN2Forces"); LAUNCHERROR("kCalculateAmoebaPmeDirectElectrostaticCutoffForces");
kReduceForceTorque( amoebaGpu ); kReduceForceTorque( amoebaGpu );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
......
...@@ -373,11 +373,11 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -373,11 +373,11 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b #define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaPmeFixedEField.h" #include "kCalculateAmoebaCudaPmeFixedEField.h"
#define USE_OUTPUT_BUFFER_PER_WARP #define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME #undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b #define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaPmeFixedEField.h" #include "kCalculateAmoebaCudaPmeFixedEField.h"
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -443,7 +443,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -443,7 +443,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
} }
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>( kCalculateAmoebaPmeDirectFixedE_FieldCutoffByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -453,7 +453,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -453,7 +453,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu->psWorkArray_3_2->_pDevStream[0] ); amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif #endif
} else { } else {
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>( kCalculateAmoebaPmeDirectFixedE_FieldCutoff_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -473,7 +473,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -473,7 +473,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
(void) fprintf( amoebaGpu->log, "cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u\n", (void) fprintf( amoebaGpu->log, "cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u\n",
threadsPerBlock, getThreadsPerBlock(amoebaGpu, sizeof(FixedFieldParticle)+sizeof(float3)), threadsPerBlock, getThreadsPerBlock(amoebaGpu, sizeof(FixedFieldParticle)+sizeof(float3)),
(sizeof(FixedFieldParticle)+sizeof(float3)), (sizeof(FixedFieldParticle)+sizeof(float3))*threadsPerBlock ); (sizeof(FixedFieldParticle)+sizeof(float3)), (sizeof(FixedFieldParticle)+sizeof(float3))*threadsPerBlock );
(void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u warp=%d\n", (void) fprintf( amoebaGpu->log, "AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u warp=%d\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp, amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers, sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits, gpu->bOutputBufferPerWarp ); (*gpu->psInteractionCount)[0], gpu->sim.workUnits, gpu->bOutputBufferPerWarp );
......
...@@ -218,11 +218,11 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce ...@@ -218,11 +218,11 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b #define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h" #include "kCalculateAmoebaCudaPmeMutualInducedField.h"
#define USE_OUTPUT_BUFFER_PER_WARP #define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME #undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b #define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h" #include "kCalculateAmoebaCudaPmeMutualInducedField.h"
__global__ __global__
...@@ -413,7 +413,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte ...@@ -413,7 +413,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
//gpu->sim.pInteractingWorkUnit, //gpu->sim.pInteractingWorkUnit,
//amoebaGpu->psWorkUnit->_pDevStream[0], //amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeMutualInducedFieldN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>( kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -426,14 +426,14 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte ...@@ -426,14 +426,14 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
} else { } else {
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "N2 no warp\n" ); (void) fprintf( amoebaGpu->log, "Cutoff no warp\n" );
(void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n", (void) fprintf( amoebaGpu->log, "AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp, amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(MutualInducedParticle), sizeof(MutualInducedParticle)*threadsPerBlock, sizeof(MutualInducedParticle), sizeof(MutualInducedParticle)*threadsPerBlock,
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#endif #endif
kCalculateAmoebaPmeMutualInducedFieldN2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>( kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "amoebaGpuTypes.h" #include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h" #include "amoebaCudaKernels.h"
#include "cudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h" #include "kCalculateAmoebaCudaUtilities.h"
#include "kCalculateAmoebaCudaVdwParticle.h" #include "kCalculateAmoebaCudaVdwParticle.h"
#include "amoebaScaleFactors.h" #include "amoebaScaleFactors.h"
...@@ -62,22 +63,6 @@ __device__ void loadVdw14_7Shared( struct Vdw14_7Particle* sA, unsigned int atom ...@@ -62,22 +63,6 @@ __device__ void loadVdw14_7Shared( struct Vdw14_7Particle* sA, unsigned int atom
} }
// load struct and arrays w/ shared data in sA
__device__ void loadVdw14_7Data( struct Vdw14_7Particle* sA,
float4* jCoord, float* jSigma, float* jEpsilon )
{
// load coordinates, sigma, epsilon
jCoord->x = sA->x;
jCoord->y = sA->y;
jCoord->z = sA->z;
*jSigma = sA->sigma;
*jEpsilon = sA->epsilon;
}
__device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule, float iSigma, float jSigma, float* combindedSigma, __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule, float iSigma, float jSigma, float* combindedSigma,
int epsilonCombiningRule, float iEpsilon, float jEpsilon, float* combindedEpsilon ) int epsilonCombiningRule, float iEpsilon, float jEpsilon, float* combindedEpsilon )
{ {
...@@ -106,15 +91,12 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule, ...@@ -106,15 +91,12 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
} }
__device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4 atomCoordinatesJ, __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float combindedEpsilon,
float combindedSigma, float combindedEpsilon,
float force[3], float* energy float force[3], float* energy
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, float4* debugArray , float4* debugArray
#endif #endif
)
)
{ {
const float deltaHalM1 = 0.07f; const float deltaHalM1 = 0.07f;
...@@ -124,15 +106,15 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4 ...@@ -124,15 +106,15 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
// get deltaR, and r between 2 atoms // on input force[i] is assummed to contain delta[i] for coordinates of atom I and J
force[0] = atomCoordinatesJ.x - atomCoordinatesI.x;
force[1] = atomCoordinatesJ.y - atomCoordinatesI.y;
force[2] = atomCoordinatesJ.z - atomCoordinatesI.z;
float rI = rsqrtf( force[0]*force[0] + force[1]*force[1] + force[2]*force[2] ); float r2 = force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
if( r2 > cAmoebaSim.vdwCutoff2 ){
*energy = force[0] = force[1] = force[2] = 0.0f;
return;
}
float rI = rsqrtf( r2 );
float r = 1.0f/rI; float r = 1.0f/rI;
float r2 = r*r;
float r6 = r2*r2*r2; float r6 = r2*r2*r2;
float r7 = r6*r; float r7 = r6*r;
...@@ -166,6 +148,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4 ...@@ -166,6 +148,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
debugArray[1].x = tau; debugArray[1].x = tau;
debugArray[1].y = rho; debugArray[1].y = rho;
debugArray[1].z = gTau; debugArray[1].z = gTau;
debugArray[1].w = r;
#endif #endif
} }
...@@ -424,6 +407,21 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA ...@@ -424,6 +407,21 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA
#undef METHOD_NAME #undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b #define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaVdw14_7.h" #include "kCalculateAmoebaCudaVdw14_7.h"
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaVdw14_7.h"
#undef METHOD_NAME
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaVdw14_7.h"
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef USE_CUTOFF
// reduce psWorkArray_3_1 -> outputArray // reduce psWorkArray_3_1 -> outputArray
...@@ -471,7 +469,7 @@ void kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpuContext amoebaGpu, CUDAStr ...@@ -471,7 +469,7 @@ void kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpuContext amoebaGpu, CUDAStr
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu ) void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff )
{ {
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -492,52 +490,122 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu ) ...@@ -492,52 +490,122 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu )
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms); memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload(); debugArray->Upload();
int targetAtom = 21; int targetAtom = 342;
#endif #endif
// clear output arrays
kClearFields_3( amoebaGpu, 1 );
// set threads/block first time through // set threads/block first time through
// on first pass, set threads/block
if( threadsPerBlock == 0 ){ if( threadsPerBlock == 0 ){
threadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(Vdw14_7Particle)); unsigned int maxThreads;
threadsPerBlock = 192; if (gpu->sm_version >= SM_20)
maxThreads = 384;
else if (gpu->sm_version >= SM_12)
maxThreads = 192;
else
maxThreads = 128;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle)), maxThreads);
} }
kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates ); kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates ); kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );
if (gpu->bOutputBufferPerWarp){ #ifdef AMOEBA_DEBUG
#if 0 (void) fprintf( amoebaGpu->log, "Apply cutoff=%d warp=%d\n", applyCutoff, gpu->bOutputBufferPerWarp );
(void) fprintf( amoebaGpu->log, "N2 warp\n" ); (void) fflush( amoebaGpu->log ); (void) fprintf( amoebaGpu->log, "numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(Vdw14_7Particle), sizeof(Vdw14_7Particle)*threadsPerBlock,
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
#endif
// clear output arrays
kClearFields_3( amoebaGpu, 1 );
kCalculateAmoebaVdw14_7N2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*amoebaGpu->nonbondThreadsPerBlock>>>( if( applyCutoff ){
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, amoebaGpu->amoebaSim.pVdwWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if( 0 ){
gpu->psInteractionCount->Download();
gpu->psInteractingWorkUnit->Download();
gpu->psInteractionFlag->Download();
amoebaGpu->psVdwWorkUnit->Download();
(void) fprintf( amoebaGpu->log, "Vdw Ixn count=%u\n", gpu->psInteractionCount->_pSysStream[0][0] );
for( unsigned int ii = 0; ii < gpu->psInteractingWorkUnit->_length; ii++ ){
unsigned int x = gpu->psInteractingWorkUnit->_pSysStream[0][ii];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
unsigned int exclusions = (x & 0x1);
x = (x >> 17) << GRIDBITS;
(void) fprintf( amoebaGpu->log, "GpuCell %8u %8u [%5u %5u %1u] %10u ", ii, gpu->psInteractingWorkUnit->_pSysStream[0][ii], x,y,exclusions, gpu->psInteractionFlag->_pSysStream[0][ii] );
x = amoebaGpu->psVdwWorkUnit->_pSysStream[0][ii];
y = ((x >> 2) & 0x7fff) << GRIDBITS;
exclusions = (x & 0x1);
x = (x >> 17) << GRIDBITS;
(void) fprintf( amoebaGpu->log, " AmGpu %8u [%5u %5u %1u]\n", amoebaGpu->psWorkUnit->_pSysStream[0][ii], x,y,exclusions );
}
(void) fflush( amoebaGpu->log );
}
amoebaGpu->psWorkUnit->_pDevStream[0], if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit,
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0], amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0], amoebaGpu->psVdwSigmaEpsilon->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0], amoebaGpu->vdwSigmaCombiningRule,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->vdwEpsilonCombiningRule,
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
debugArray->_pDevStream[0], targetAtom ); debugArray->_pDevStream[0], targetAtom );
#else #else
amoebaGpu->psWorkArray_3_2->_pDevStream[0] ); amoebaGpu->psWorkArray_3_1->_pDevStream[0] );
#endif #endif
} else {
kCalculateAmoebaVdw14_7Cutoff_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit,
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
amoebaGpu->psVdwSigmaEpsilon->_pDevStream[0],
amoebaGpu->vdwSigmaCombiningRule,
amoebaGpu->vdwEpsilonCombiningRule,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
debugArray->_pDevStream[0], targetAtom );
#else
amoebaGpu->psWorkArray_3_1->_pDevStream[0] );
#endif #endif
}
LAUNCHERROR("kCalculateAmoebaVdw14_7Cutoff");
} else { } else {
if (gpu->bOutputBufferPerWarp){
//amoebaGpu->psVdwWorkUnit->_pDevStream[0],
kCalculateAmoebaVdw14_7N2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit,
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
amoebaGpu->psVdwSigmaEpsilon->_pDevStream[0],
amoebaGpu->vdwSigmaCombiningRule,
amoebaGpu->vdwEpsilonCombiningRule,
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "N2 no warp\n" ); amoebaGpu->psWorkArray_3_1->_pDevStream[0],
(void) fprintf( amoebaGpu->log, "numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n", debugArray->_pDevStream[0], targetAtom );
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp, #else
sizeof(Vdw14_7Particle), sizeof(Vdw14_7Particle)*threadsPerBlock, amoebaGpu->psWorkArray_3_1->_pDevStream[0] );
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
#endif #endif
} else {
kCalculateAmoebaVdw14_7N2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>( kCalculateAmoebaVdw14_7N2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
amoebaGpu->psVdwWorkUnit->_pDevStream[0], amoebaGpu->psVdwWorkUnit->_pDevStream[0],
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0], amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
...@@ -552,8 +620,8 @@ threadsPerBlock = 192; ...@@ -552,8 +620,8 @@ threadsPerBlock = 192;
#endif #endif
} }
LAUNCHERROR("kCalculateAmoebaVdw14_7N2");
LAUNCHERROR("kCalculateAmoebaVdw14_7"); }
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
......
...@@ -53,9 +53,8 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)( ...@@ -53,9 +53,8 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
float4 jCoord; int exclusionIndex;
float jSigma; int exclusionMask;
float jEpsilon;
float totalEnergy = 0.0f; float totalEnergy = 0.0f;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -77,10 +76,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)( ...@@ -77,10 +76,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
unsigned int tj = tgx; unsigned int tj = tgx;
Vdw14_7Particle* psA = &sA[tbx]; Vdw14_7Particle* psA = &sA[tbx];
Vdw14_7Particle localParticle;
unsigned int atomI = x + tgx; unsigned int atomI = x + tgx;
float4 iCoord = atomCoord[atomI]; loadVdw14_7Shared( &localParticle, atomI, atomCoord, vdwParameters );
float iSigma = vdwParameters[atomI].x;
float iEpsilon = vdwParameters[atomI].y;
float forceSum[3]; float forceSum[3];
...@@ -94,10 +93,12 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)( ...@@ -94,10 +93,12 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
if (x == y) if (x == y)
{ {
if( bExclusionFlag ){
unsigned int xi = x >> GRIDBITS; unsigned int xi = x >> GRIDBITS;
unsigned int cell = xi + xi*cAmoebaSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2; unsigned int cell = xi + xi*cAmoebaSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
int exclusionIndex = cAmoebaSim.pVdwExclusionIndicesIndex[cell]+tgx; exclusionIndex = cAmoebaSim.pVdwExclusionIndicesIndex[cell]+tgx;
int exclusionMask = cAmoebaSim.pVdwExclusionIndices[exclusionIndex]; exclusionMask = cAmoebaSim.pVdwExclusionIndices[exclusionIndex];
}
// load shared data // load shared data
...@@ -108,30 +109,38 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)( ...@@ -108,30 +109,38 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
float ijForce[3]; float ijForce[3];
// load coords, charge, ...
loadVdw14_7Data( &(psA[j]), &jCoord, &jSigma, &jEpsilon );
// get combined sigma and epsilon // get combined sigma and epsilon
float combindedSigma; float combindedSigma;
float combindedEpsilon; float combindedEpsilon;
getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, iSigma, jSigma, &combindedSigma, getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, localParticle.sigma, psA[j].sigma, &combindedSigma,
epsilonCombiningRule, iEpsilon, jEpsilon, &combindedEpsilon ); epsilonCombiningRule, localParticle.epsilon, psA[j].epsilon, &combindedEpsilon );
// calculate force // calculate force
ijForce[0] = psA[j].x - localParticle.x;
ijForce[1] = psA[j].y - localParticle.y;
ijForce[2] = psA[j].z - localParticle.z;
if( cAmoebaSim.vdwUsePBC )
{
ijForce[0] -= floor(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
ijForce[1] -= floor(ijForce[1]*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
ijForce[2] -= floor(ijForce[2]*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
}
float energy; float energy;
calculateVdw14_7PairIxn_kernel( iCoord, jCoord, combindedSigma, combindedEpsilon, ijForce, &energy calculateVdw14_7PairIxn_kernel( combindedSigma, combindedEpsilon, ijForce, &energy
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, pullDebug , pullDebug
#endif #endif
); );
// mask out excluded ixns // mask out excluded ixns
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+j) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
if( mask && bExclusionFlag ){
unsigned int maskIndex = 1 << j; unsigned int maskIndex = 1 << j;
unsigned int mask = ( (exclusionMask & maskIndex) || (atomI >= cAmoebaSim.numberOfAtoms) || ((y+j) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1; mask = (exclusionMask & maskIndex) ? 0 : 1;
}
// add to field at atomI the field due atomJ's dipole // add to field at atomI the field due atomJ's dipole
...@@ -152,7 +161,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){ ...@@ -152,7 +161,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = (float) x; debugArray[index].x = (float) x;
debugArray[index].y = (float) y; debugArray[index].y = (float) y;
debugArray[index].z = (float) cell+tgx; debugArray[index].z = (float) tgx;
debugArray[index].w = energy; debugArray[index].w = energy;
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
...@@ -188,10 +197,8 @@ if( atomI == targetAtom || (y+j) == targetAtom ){ ...@@ -188,10 +197,8 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
load3dArray( offset, forceSum, outputForce ); load3dArray( offset, forceSum, outputForce );
#endif #endif
} } else {
else
{
// Read fixed atom data into registers and GRF
if (lasty != y) if (lasty != y)
{ {
// load coordinates, charge, ... // load coordinates, charge, ...
...@@ -200,120 +207,51 @@ if( atomI == targetAtom || (y+j) == targetAtom ){ ...@@ -200,120 +207,51 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
} }
// zero shared fields #ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
zeroVdw14_7SharedForce( &(sA[threadIdx.x]) ); if (flags == 0) {
} else {
if( !bExclusionFlag )
{
for (unsigned int j = 0; j < GRID; j++)
{
float ijForce[3];
// load coords, charge, ...
loadVdw14_7Data( &(psA[tj]), &jCoord, &jSigma, &jEpsilon );
// get combined sigma and epsilon
float combindedSigma;
float combindedEpsilon;
getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, iSigma, jSigma, &combindedSigma,
epsilonCombiningRule, iEpsilon, jEpsilon, &combindedEpsilon );
// calculate force
float energy;
calculateVdw14_7PairIxn_kernel( iCoord, jCoord, combindedSigma, combindedEpsilon, ijForce, &energy
#ifdef AMOEBA_DEBUG
, pullDebug
#endif #endif
);
if( (atomI < cAmoebaSim.numberOfAtoms) && ((y+tj) < cAmoebaSim.numberOfAtoms) ){
// add to field at atomI the field due atomJ's dipole
forceSum[0] += ijForce[0];
forceSum[1] += ijForce[1];
forceSum[2] += ijForce[2];
// add to field at atomJ the field due atomI's dipole
psA[tj].force[0] -= ijForce[0];
psA[tj].force[1] -= ijForce[1];
psA[tj].force[2] -= ijForce[2];
totalEnergy += energy;
}
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+tj) == targetAtom ){
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].z = -2.0f;
debugArray[index].w = -1.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = (float) x;
debugArray[index].y = (float) y;
debugArray[index].z = -1.0f;
debugArray[index].w = energy;
index += cAmoebaSim.paddedNumberOfAtoms; // zero shared fields
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = pullDebug[0].y;
debugArray[index].z = pullDebug[0].z;
debugArray[index].w = pullDebug[0].w;
index += cAmoebaSim.paddedNumberOfAtoms; zeroVdw14_7SharedForce( &(sA[threadIdx.x]) );
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cAmoebaSim.paddedNumberOfAtoms; if( bExclusionFlag ){
debugArray[index].x = ijForce[0];
debugArray[index].y = ijForce[1];
debugArray[index].z = ijForce[2];
}
#endif
tj = (tj + 1) & (GRID - 1);
}
}
else
{
unsigned int xi = x >> GRIDBITS; unsigned int xi = x >> GRIDBITS;
unsigned int yi = y >> GRIDBITS; unsigned int yi = y >> GRIDBITS;
unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2; unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
int exclusionIndex = cAmoebaSim.pVdwExclusionIndicesIndex[cell]+tgx; exclusionIndex = cAmoebaSim.pVdwExclusionIndicesIndex[cell]+tgx;
int exclusionMask = cAmoebaSim.pVdwExclusionIndices[exclusionIndex]; exclusionMask = cAmoebaSim.pVdwExclusionIndices[exclusionIndex];
}
for (unsigned int j = 0; j < GRID; j++) for (unsigned int j = 0; j < GRID; j++)
{ {
float ijForce[3]; float ijForce[3];
// load coords, charge, ...
loadVdw14_7Data( &(psA[tj]), &jCoord, &jSigma, &jEpsilon );
// get combined sigma and epsilon // get combined sigma and epsilon
float combindedSigma; float combindedSigma;
float combindedEpsilon; float combindedEpsilon;
getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, iSigma, jSigma, &combindedSigma, getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, localParticle.sigma, psA[tj].sigma, &combindedSigma,
epsilonCombiningRule, iEpsilon, jEpsilon, &combindedEpsilon ); epsilonCombiningRule, localParticle.epsilon, psA[tj].epsilon, &combindedEpsilon );
// calculate force // calculate force
float energy; float energy;
calculateVdw14_7PairIxn_kernel( iCoord, jCoord, combindedSigma, combindedEpsilon, ijForce, &energy ijForce[0] = psA[tj].x - localParticle.x;
ijForce[1] = psA[tj].y - localParticle.y;
ijForce[2] = psA[tj].z - localParticle.z;
if( cAmoebaSim.vdwUsePBC )
{
ijForce[0] -= floor(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
ijForce[1] -= floor(ijForce[1]*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
ijForce[2] -= floor(ijForce[2]*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
}
calculateVdw14_7PairIxn_kernel( combindedSigma, combindedEpsilon, ijForce, &energy
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, pullDebug , pullDebug
#endif #endif
...@@ -321,8 +259,11 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){ ...@@ -321,8 +259,11 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
// mask out excluded ixns // mask out excluded ixns
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+tj) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
if( mask && bExclusionFlag ){
unsigned int maskIndex = 1 << tj; unsigned int maskIndex = 1 << tj;
unsigned int mask = ( (exclusionMask & maskIndex) || (atomI >= cAmoebaSim.numberOfAtoms) || ((y+tj) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1; mask = (exclusionMask & maskIndex) ? 0 : 1;
}
// accumulate force for atomI // accumulate force for atomI
...@@ -350,7 +291,7 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){ ...@@ -350,7 +291,7 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = (float) x; debugArray[index].x = (float) x;
debugArray[index].y = (float) y; debugArray[index].y = (float) y;
debugArray[index].z = (float) cell+tgx; debugArray[index].z = (float) tgx;
debugArray[index].w = energy; debugArray[index].w = energy;
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
...@@ -373,8 +314,10 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){ ...@@ -373,8 +314,10 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
#endif #endif
tj = (tj + 1) & (GRID - 1); tj = (tj + 1) & (GRID - 1);
} } // end of j-loop
} #ifdef USE_CUTOFF
}
#endif
// Write results // Write results
...@@ -385,7 +328,6 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){ ...@@ -385,7 +328,6 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
offset = 3*(y + tgx + warp*cAmoebaSim.paddedNumberOfAtoms); offset = 3*(y + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].force, outputForce ); load3dArrayBufferPerWarp( offset, sA[threadIdx.x].force, outputForce );
#else #else
unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms); unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, forceSum, outputForce ); load3dArray( offset, forceSum, outputForce );
...@@ -395,7 +337,8 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){ ...@@ -395,7 +337,8 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
#endif #endif
lasty = y; lasty = y;
}
} // x == y block
pos++; pos++;
} }
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy; cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
//----------------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------------
#include "cudaKernels.h"
#include "amoebaCudaKernels.h" #include "amoebaCudaKernels.h"
#include <stdio.h> #include <stdio.h>
...@@ -353,13 +354,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu ) ...@@ -353,13 +354,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
} }
#undef USE_PERIODIC
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kFindInteractingBlocks.h"
#undef USE_PERIODIC
#undef METHOD_NAME
void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood ) void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood )
{ {
std::string methodName = "kCalculateAmoebaMultipoleForces"; std::string methodName = "kCalculateAmoebaMultipoleForces";
...@@ -384,7 +378,8 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG ...@@ -384,7 +378,8 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
LAUNCHERROR("kFindBlockBoundsPeriodic"); LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>(); kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic"); LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount); //compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, amoebaGpu->psWorkUnit->_pDevStream[0], gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic"); LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
......
...@@ -2460,7 +2460,7 @@ static int readAmoebaGeneralizedKirkwoodParameters( FILE* filePtr, MapStringInt& ...@@ -2460,7 +2460,7 @@ static int readAmoebaGeneralizedKirkwoodParameters( FILE* filePtr, MapStringInt&
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const StringVector& tokens, static int readAmoebaVdwParameters( FILE* filePtr, int version, MapStringInt& forceMap, const StringVector& tokens,
System& system, int useOpenMMUnits, System& system, int useOpenMMUnits,
MapStringVectorOfVectors& supplementary, MapStringVectorOfVectors& supplementary,
MapStringString& inputArgumentMap, int* lineCount, FILE* log ){ MapStringString& inputArgumentMap, int* lineCount, FILE* log ){
...@@ -2542,6 +2542,21 @@ static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const ...@@ -2542,6 +2542,21 @@ static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const
} }
} }
if( version > 1 ){
lineTokensT.resize(0);
isNotEof = readLine( filePtr, lineTokensT, lineCount, log );
if( lineTokensT[0] == "AmoebaVdw14_7Periodic" ){
int usePBC = atoi( lineTokensT[1].c_str() );
vdwForce->setPBC( usePBC );
}
lineTokensT.resize(0);
isNotEof = readLine( filePtr, lineTokensT, lineCount, log );
if( lineTokensT[0] == "AmoebaVdw14_7CutOff" ){
double cutoff = atof( lineTokensT[1].c_str() );
vdwForce->setCutoff( cutoff );
}
}
lineTokensT.resize(0); lineTokensT.resize(0);
isNotEof = readLine( filePtr, lineTokensT, lineCount, log ); isNotEof = readLine( filePtr, lineTokensT, lineCount, log );
if( lineTokensT[0] == "AmoebaVdw14_7Exclusion" ){ if( lineTokensT[0] == "AmoebaVdw14_7Exclusion" ){
...@@ -2621,6 +2636,8 @@ static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const ...@@ -2621,6 +2636,8 @@ static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const
methodName.c_str(), arraySize, (useOpenMMUnits ? "OpenMM" : "Amoeba"), methodName.c_str(), arraySize, (useOpenMMUnits ? "OpenMM" : "Amoeba"),
vdwForce->getSigmaCombiningRule().c_str(), vdwForce->getEpsilonCombiningRule().c_str() ); vdwForce->getSigmaCombiningRule().c_str(), vdwForce->getEpsilonCombiningRule().c_str() );
(void) fprintf( log, "use periodic boundary conditions=%d cutoff=%15.7e\n", vdwForce->getPBC(), vdwForce->getCutoff() );
for( int ii = 0; ii < vdwForce->getNumParticles(); ii++ ){ for( int ii = 0; ii < vdwForce->getNumParticles(); ii++ ){
int indexIV, indexClass; int indexIV, indexClass;
double sigma, epsilon, reduction; double sigma, epsilon, reduction;
...@@ -3669,7 +3686,7 @@ Integrator* readAmoebaParameterFile( const std::string& inputParameterFile, MapS ...@@ -3669,7 +3686,7 @@ Integrator* readAmoebaParameterFile( const std::string& inputParameterFile, MapS
// Amoeba Vdw // Amoeba Vdw
} else if( field == "AmoebaVdw14_7SigEpsTable" || field == "AmoebaVdw14_7Reduction" ){ } else if( field == "AmoebaVdw14_7SigEpsTable" || field == "AmoebaVdw14_7Reduction" ){
readAmoebaVdwParameters( filePtr, forceMap, tokens, system, useOpenMMUnits, supplementary, inputArgumentMap, &lineCount, log ); readAmoebaVdwParameters( filePtr, version, forceMap, tokens, system, useOpenMMUnits, supplementary, inputArgumentMap, &lineCount, log );
} else if( field == "AmoebaVdwForce" ){ } else if( field == "AmoebaVdwForce" ){
readVec3( filePtr, tokens, forces[AMOEBA_VDW_FORCE], &lineCount, field, log ); readVec3( filePtr, tokens, forces[AMOEBA_VDW_FORCE], &lineCount, field, log );
} else if( field == "AmoebaVdwEnergy" ){ } else if( field == "AmoebaVdwEnergy" ){
...@@ -4560,7 +4577,7 @@ void testUsingAmoebaTinkerParameterFile( const std::string& amoebaTinkerParamete ...@@ -4560,7 +4577,7 @@ void testUsingAmoebaTinkerParameterFile( const std::string& amoebaTinkerParamete
StringVector forceList; StringVector forceList;
std::string activeForceNames; std::string activeForceNames;
for( MapStringInt::const_iterator ii = forceMap.begin(); ii != forceMap.end(); ii++ ){ for( MapStringInt::const_iterator ii = forceMap.begin(); ii != forceMap.end(); ii++ ){
if( ii->second ){ if( ii->second && tinkerForces.find( ii->first ) != tinkerForces.end() ){
if( includeCavityTerm && ii->first == AMOEBA_GK_FORCE ){ if( includeCavityTerm && ii->first == AMOEBA_GK_FORCE ){
forceList.push_back( AMOEBA_GK_CAVITY_FORCE ); forceList.push_back( AMOEBA_GK_CAVITY_FORCE );
activeForceNames += AMOEBA_GK_CAVITY_FORCE + ":"; activeForceNames += AMOEBA_GK_CAVITY_FORCE + ":";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment