Commit 348a03e6 authored by one's avatar one
Browse files

Add experimental HIP PME fast grid level

Introduce OPENMM_HIP_PME_FAST_GRID_LEVEL as a HIP-only experimental
control for reducing automatically selected PME grid dimensions.  A
positive level repeatedly steps each grid dimension down to the previous
legal FFT size, while level 0 preserves the existing OpenMM behavior.

Keep explicit user-provided PME grid parameters unchanged so the feature
only affects automatic grid selection.  This is intended for performance
and accuracy exploration, not as a default tuning policy.
parent 97239ca6
...@@ -34,12 +34,52 @@ ...@@ -34,12 +34,52 @@
#include <algorithm> #include <algorithm>
#include <assert.h> #include <assert.h>
#include <cmath> #include <cmath>
#include <cstdlib>
#include <iterator> #include <iterator>
#include <set> #include <set>
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
namespace {
static const int MaxHipPmeFastGridLevel = 32;
static int getHipPmeFastGridLevel(bool isHip) {
if (!isHip)
return 0;
const char* value = getenv("OPENMM_HIP_PME_FAST_GRID_LEVEL");
if (value == NULL)
return 0;
char* end = NULL;
long level = strtol(value, &end, 10);
if (end == value || level <= 0)
return 0;
return (int) min(level, (long) MaxHipPmeFastGridLevel);
}
static bool usesAutomaticPmeGrid(double alpha, int xsize, int ysize, int zsize) {
return (alpha == 0.0 && xsize == 0 && ysize == 0 && zsize == 0);
}
static int findPreviousLegalFFTDimension(ComputeContext& cc, int size) {
for (int candidate = size-1; candidate > 0; candidate--)
if (cc.findLegalFFTDimension(candidate) == candidate)
return candidate;
return size;
}
static void applyHipPmeFastGrid(ComputeContext& cc, int level, int& xsize, int& ysize, int& zsize) {
if (level == 0)
return;
for (int i = 0; i < level; i++) {
xsize = findPreviousLegalFFTDimension(cc, xsize);
ysize = findPreviousLegalFFTDimension(cc, ysize);
zsize = findPreviousLegalFFTDimension(cc, zsize);
}
}
}
class CommonCalcNonbondedForceKernel::ForceInfo : public ComputeForceInfo { class CommonCalcNonbondedForceKernel::ForceInfo : public ComputeForceInfo {
public: public:
ForceInfo(const NonbondedForce& force) : force(force) { ForceInfo(const NonbondedForce& force) : force(force) {
...@@ -292,6 +332,7 @@ void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, cons ...@@ -292,6 +332,7 @@ void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, cons
pmeGridIndexBlockSize = useLargeHipPmeBlocks ? 128 : -1; pmeGridIndexBlockSize = useLargeHipPmeBlocks ? 128 : -1;
pmeSpreadChargeBlockSize = useLargeHipPmeBlocks ? 128 : -1; pmeSpreadChargeBlockSize = useLargeHipPmeBlocks ? 128 : -1;
pmeFinishSpreadChargeBlockSize = isHip ? 128 : -1; pmeFinishSpreadChargeBlockSize = isHip ? 128 : -1;
int hipPmeFastGridLevel = getHipPmeFastGridLevel(isHip);
pmeDispersionSpreadWaveSize = 64; pmeDispersionSpreadWaveSize = 64;
pmeDispersionSpreadBlockSize = 256; pmeDispersionSpreadBlockSize = 256;
pmeDispersionAtomsPerWave = pmeDispersionSpreadWaveSize/PmeOrder; pmeDispersionAtomsPerWave = pmeDispersionSpreadWaveSize/PmeOrder;
...@@ -382,16 +423,27 @@ void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, cons ...@@ -382,16 +423,27 @@ void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, cons
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) { else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters. // Compute the PME parameters.
double requestedAlpha;
int requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ;
force.getPMEParameters(requestedAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
bool useAutomaticPmeGrid = usesAutomaticPmeGrid(requestedAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = cc.findLegalFFTDimension(gridSizeX); gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY); gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ); gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (useAutomaticPmeGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, gridSizeX, gridSizeY, gridSizeZ);
if (doLJPME) { if (doLJPME) {
double requestedDispersionAlpha;
force.getLJPMEParameters(requestedDispersionAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
bool useAutomaticDispersionGrid = usesAutomaticPmeGrid(requestedDispersionAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX, NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true); dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX); dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX);
dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY); dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY);
dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ); dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ);
if (useAutomaticDispersionGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ);
} }
defines["EWALD_ALPHA"] = cc.doubleToString(alpha); defines["EWALD_ALPHA"] = cc.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0/sqrt(M_PI));
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstdlib>
#ifdef _MSC_VER #ifdef _MSC_VER
#include <windows.h> #include <windows.h>
#endif #endif
...@@ -48,6 +49,8 @@ ...@@ -48,6 +49,8 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
static const int MaxHipPmeFastGridLevel = 32;
static void setPeriodicBoxArgs(ComputeContext& cc, ComputeKernel kernel, int index) { static void setPeriodicBoxArgs(ComputeContext& cc, ComputeKernel kernel, int index) {
Vec3 a, b, c; Vec3 a, b, c;
cc.getPeriodicBoxVectors(a, b, c); cc.getPeriodicBoxVectors(a, b, c);
...@@ -67,6 +70,40 @@ static void setPeriodicBoxArgs(ComputeContext& cc, ComputeKernel kernel, int ind ...@@ -67,6 +70,40 @@ static void setPeriodicBoxArgs(ComputeContext& cc, ComputeKernel kernel, int ind
} }
} }
static int getHipPmeFastGridLevel(bool isHip) {
if (!isHip)
return 0;
const char* value = getenv("OPENMM_HIP_PME_FAST_GRID_LEVEL");
if (value == NULL)
return 0;
char* end = NULL;
long level = strtol(value, &end, 10);
if (end == value || level <= 0)
return 0;
return (int) min(level, (long) MaxHipPmeFastGridLevel);
}
static bool usesAutomaticPmeGrid(double alpha, int xsize, int ysize, int zsize) {
return (alpha == 0.0 && xsize == 0 && ysize == 0 && zsize == 0);
}
static int findPreviousLegalFFTDimension(ComputeContext& cc, int size) {
for (int candidate = size-1; candidate > 0; candidate--)
if (cc.findLegalFFTDimension(candidate) == candidate)
return candidate;
return size;
}
static void applyHipPmeFastGrid(ComputeContext& cc, int level, int& xsize, int& ysize, int& zsize) {
if (level == 0)
return;
for (int i = 0; i < level; i++) {
xsize = findPreviousLegalFFTDimension(cc, xsize);
ysize = findPreviousLegalFFTDimension(cc, ysize);
zsize = findPreviousLegalFFTDimension(cc, zsize);
}
}
/* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
* AmoebaTorsionTorsion * * AmoebaTorsionTorsion *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
...@@ -427,12 +464,15 @@ void CommonCalcAmoebaMultipoleForceKernel::initialize(const System& system, cons ...@@ -427,12 +464,15 @@ void CommonCalcAmoebaMultipoleForceKernel::initialize(const System& system, cons
double sum = 0; double sum = 0;
for (int j = i; j < maxExtrapolationOrder; j++) for (int j = i; j < maxExtrapolationOrder; j++)
sum += force.getExtrapolationCoefficients()[j]; sum += force.getExtrapolationCoefficients()[j];
coefficients << cc.doubleToString(sum); coefficients << cc.doubleToString(sum);
} }
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str(); defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
bool isHip = (getPlatform().getName() == "HIP");
int hipPmeFastGridLevel = getHipPmeFastGridLevel(isHip);
if (usePME) { if (usePME) {
int nx, ny, nz; int nx, ny, nz;
force.getPMEParameters(pmeAlpha, nx, ny, nz); force.getPMEParameters(pmeAlpha, nx, ny, nz);
bool useAutomaticPmeGrid = usesAutomaticPmeGrid(pmeAlpha, nx, ny, nz);
if (nx == 0 || pmeAlpha == 0) { if (nx == 0 || pmeAlpha == 0) {
NonbondedForce nb; NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance()); nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
...@@ -441,6 +481,8 @@ void CommonCalcAmoebaMultipoleForceKernel::initialize(const System& system, cons ...@@ -441,6 +481,8 @@ void CommonCalcAmoebaMultipoleForceKernel::initialize(const System& system, cons
gridSizeX = cc.findLegalFFTDimension(gridSizeX); gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY); gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ); gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (useAutomaticPmeGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, gridSizeX, gridSizeY, gridSizeZ);
} }
else { else {
gridSizeX = cc.findLegalFFTDimension(nx); gridSizeX = cc.findLegalFFTDimension(nx);
...@@ -2573,9 +2615,12 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const ...@@ -2573,9 +2615,12 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
} }
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str(); defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
cutoff = force.getCutoffDistance(); cutoff = force.getCutoffDistance();
bool isHip = (getPlatform().getName() == "HIP");
int hipPmeFastGridLevel = getHipPmeFastGridLevel(isHip);
if (usePME) { if (usePME) {
int nx, ny, nz; int nx, ny, nz;
force.getPMEParameters(pmeAlpha, nx, ny, nz); force.getPMEParameters(pmeAlpha, nx, ny, nz);
bool useAutomaticPmeGrid = usesAutomaticPmeGrid(pmeAlpha, nx, ny, nz);
if (nx == 0 || pmeAlpha == 0) { if (nx == 0 || pmeAlpha == 0) {
NonbondedForce nb; NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance()); nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
...@@ -2584,6 +2629,8 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const ...@@ -2584,6 +2629,8 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
gridSizeX = cc.findLegalFFTDimension(gridSizeX); gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY); gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ); gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (useAutomaticPmeGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, gridSizeX, gridSizeY, gridSizeZ);
} }
else { else {
gridSizeX = cc.findLegalFFTDimension(nx); gridSizeX = cc.findLegalFFTDimension(nx);
...@@ -2591,6 +2638,7 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const ...@@ -2591,6 +2638,7 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
gridSizeZ = cc.findLegalFFTDimension(nz); gridSizeZ = cc.findLegalFFTDimension(nz);
} }
force.getDPMEParameters(dpmeAlpha, nx, ny, nz); force.getDPMEParameters(dpmeAlpha, nx, ny, nz);
bool useAutomaticDispersionGrid = usesAutomaticPmeGrid(dpmeAlpha, nx, ny, nz);
if (nx == 0 || dpmeAlpha == 0) { if (nx == 0 || dpmeAlpha == 0) {
NonbondedForce nb; NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance()); nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
...@@ -2599,6 +2647,8 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const ...@@ -2599,6 +2647,8 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX); dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX);
dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY); dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY);
dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ); dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ);
if (useAutomaticDispersionGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ);
} }
else { else {
dispersionGridSizeX = cc.findLegalFFTDimension(nx); dispersionGridSizeX = cc.findLegalFFTDimension(nx);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment