"devtools/ci/gh-actions/vscode:/vscode.git/clone" did not exist on "9fe1bae6efa18a55994522a8aac4f24338a2894e"
Commit 132a94bc authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Warp/non-warp calls were reversed in kCalculateAmoebaCudaKirkwood

parent 07f8d5ce
......@@ -793,6 +793,8 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
if( data.getHasAmoebaGeneralizedKirkwood() ){
kCalculateObcGbsaBornSum(gpu->gpuContext);
kReduceObcGbsaBornSum(gpu->gpuContext);
//initializeCudaFloatArray( gpu->gpuContext->natoms, 1, gpu->gpuContext->psBornRadii, 0.1 );
//initializeCudaFloatArray( gpu->gpuContext->natoms, 1, gpu->gpuContext->psObcChain, 0.0 );
}
// multipoles
......@@ -801,7 +803,6 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
//kClearForces(gpu->gpuContext);
//kClearEnergy(gpu->gpuContext);
//(void) fprintf( data.getLog(), "computeAmoebaMultipoleForce clearing forces/energy after kCalculateAmoebaMultipoleForces()\n" );
// GK
......
......@@ -191,12 +191,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, "\n\n" );
(void) fprintf( log, " gpuContext %p\n", amoebaGpu->gpuContext );
(void) fprintf( log, " log %p\n", amoebaGpu->log );
(void) fprintf( log, " log %p %s\n", amoebaGpu->log, amoebaGpu->log == stderr ? "is stderr" : "is not stderr");
(void) fprintf( log, " sm_version %u\n", gpu->sm_version );
(void) fprintf( log, " device %u\n", gpu->device );
(void) fprintf( log, " sharedMemoryPerBlock %u\n", gpu->sharedMemoryPerBlock );
(void) fprintf( log, " pMapArray %p\n", amoebaGpu->pMapArray );
(void) fprintf( log, " dMapArray %p\n", amoebaGpu->dMapArray );
(void) fprintf( log, " bOutputBufferPerWarp %d\n", amoebaGpu->bOutputBufferPerWarp );
(void) fprintf( log, " paddedNumberOfAtoms %u\n", amoebaGpu->paddedNumberOfAtoms );
(void) fprintf( log, " nonbondBlocks %u\n", amoebaGpu->nonbondBlocks );
......@@ -209,6 +207,13 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, " outputBuffers %u\n", amoebaGpu->outputBuffers );
(void) fprintf( log, " workUnits %u\n", amoebaGpu->workUnits );
gpuPrintCudaStreamFloat( amoebaGpu->gpuContext->psEnergy, log );
gpuPrintCudaStreamFloat4( amoebaGpu->gpuContext->psForce4, log );
gpuPrintCudaStreamFloat4( amoebaGpu->gpuContext->psPosq4, log );
gpuPrintCudaStreamFloat2( amoebaGpu->gpuContext->psObcData, log );
gpuPrintCudaStreamFloat( amoebaGpu->gpuContext->psBornForce, log );
(void) fprintf( log, "\n\n" );
(void) fprintf( log, " amoebaBonds %u\n", amoebaGpu->amoebaSim.amoebaBonds );
gpuPrintCudaStreamFloat( amoebaGpu->psWorkArray_3_1, log );
gpuPrintCudaStreamFloat( amoebaGpu->psWorkArray_3_2, log );
gpuPrintCudaStreamFloat( amoebaGpu->psWorkArray_3_3, log );
......@@ -337,6 +342,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, " quartic %15.7e\n", amoebaGpu->amoebaSim.amoebaUreyBradleyQuarticicParameter);
(void) fprintf( log, " pAmoebaUreyBradleyID %p\n", amoebaGpu->amoebaSim.pAmoebaUreyBradleyID );
(void) fprintf( log, " pAmoebaUreyBradleyParameter %p\n", amoebaGpu->amoebaSim.pAmoebaUreyBradleyParameter );
(void) fprintf( log, "\n\n" );
// if( amoebaGpu->psRotationMatrix)(void) fprintf( log, "\n" );
// gpuPrintCudaStreamFloat( amoebaGpu->psRotationMatrix, log );
......@@ -394,7 +400,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
gpuPrintCudaStreamFloat( amoebaGpu->psInducedDipole, log );
gpuPrintCudaStreamFloat( amoebaGpu->psInducedDipolePolar, log );
gpuPrintCudaStreamFloat( amoebaGpu->psInducedDipolePolar, log );
gpuPrintCudaStreamFloat( amoebaGpu->psCurrentEpsilon, log );
(void) fprintf( log, " numberOfSorWorkVectors %u\n", amoebaGpu->numberOfSorWorkVectors);
......@@ -4437,3 +4442,32 @@ void gpuCopyWorkUnit( amoebaGpuContext amoebaGpu ){
}
#undef AMOEBA_DEBUG
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
@param numberOfParticles number of particles
@param entriesPerParticle entries/particles array
@param array cuda array
@param initValue vector init value
--------------------------------------------------------------------------------------- */
void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, float initValue )
{
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "initializeCudaFloatArray";
// ---------------------------------------------------------------------------------------
for( int ii = 0; ii < numberOfParticles; ii++ ){
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
array->_pSysStream[0][entriesPerParticle*ii+jj] = initValue;
}
}
array->Upload();
}
......@@ -151,6 +151,8 @@ extern void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticl
extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float2>* array, VectorOfDoubleVectors& outputVector );
extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order );
extern void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector );
extern void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, float initValue );
extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear );
extern void kClearFloat4( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float4>* fieldToClear );
......
......@@ -1907,7 +1907,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
kClearFields_3( amoebaGpu, 6 );
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaKirkwoodN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(KirkwoodParticle)*threadsPerBlock>>>(
kCalculateAmoebaCudaKirkwoodN2ByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(KirkwoodParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0]
#ifdef AMOEBA_DEBUG
, debugArray->_pDevStream[0], targetAtom );
......@@ -1924,7 +1924,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
(void) fflush( amoebaGpu->log );
#endif
kCalculateAmoebaCudaKirkwoodN2ByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(KirkwoodParticle)*threadsPerBlock>>>(
kCalculateAmoebaCudaKirkwoodN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(KirkwoodParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0]
#ifdef AMOEBA_DEBUG
, debugArray->_pDevStream[0], targetAtom );
......
......@@ -1059,6 +1059,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
threadsPerBlock = std::min(getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle)), maxThreads);
}
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log && timestep == 1 ){
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwoodEDiffN2Forces: blocks=%u threads=%u bffr/Warp=%u atm=%lu shrd=%lu"
" Ebuf=%u ixnCt=%lu workUnits=%u sm=%d device=%d sharedMemoryPerBlock=%u\n",
......@@ -1068,6 +1069,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
//gpuPrintCudaAmoebaGmxSimulation(amoebaGpu, amoebaGpu->log );
(void) fflush( amoebaGpu->log );
}
#endif
if (gpu->bOutputBufferPerWarp){
......
......@@ -502,8 +502,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
int targetAtom = 0;
static const char* methodName = "cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply";
if( 1 && amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: scalingDistanceCutoff=%.5f\n",
methodName, amoebaGpu->scalingDistanceCutoff );
(void) fprintf( amoebaGpu->log, "%s\n", methodName );
(void) fflush( amoebaGpu->log );
}
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
......@@ -583,7 +582,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
//printMiFieldBuffer( amoebaGpu, 37 );
//printMiFieldBuffer( amoebaGpu, 38 );
if( amoebaGpu->log && iteration == -1 ){
if( amoebaGpu->log && iteration == 1 ){
(void) fprintf( amoebaGpu->log, "Finished MI kernel execution %d\n", iteration ); (void) fflush( amoebaGpu->log );
......
......@@ -1390,6 +1390,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque using PME
......@@ -1413,7 +1414,7 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
zeroForce( amoebaGpu );
}
if( 1 ){
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
......
......@@ -565,7 +565,23 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
if( hasAmoebaGeneralizedKirkwood ){
cudaComputeAmoebaFixedEAndGkFields( amoebaGpu );
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, 0.0 );
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, 0.0 );
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psGk_Field, 0.0 );
}
cudaComputeAmoebaMutualInducedAndGkField( amoebaGpu );
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, 0.0 );
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, 0.0 );
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipoleS, 0.0 );
initializeCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolarS, 0.0 );
amoebaGpu->mutualInducedDone = 1;
}
} else {
if( amoebaGpu->multipoleNonbondedMethod == AMOEBA_NO_CUTOFF ){
cudaComputeAmoebaFixedEField( amoebaGpu );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment