Commit 348a03e6 authored by one's avatar one
Browse files

Add experimental HIP PME fast grid level

Introduce OPENMM_HIP_PME_FAST_GRID_LEVEL as a HIP-only experimental
control for reducing automatically selected PME grid dimensions.  A
positive level repeatedly steps each grid dimension down to the previous
legal FFT size, while level 0 preserves the existing OpenMM behavior.

Keep explicit user-provided PME grid parameters unchanged so the feature
only affects automatic grid selection.  This is intended for performance
and accuracy exploration, not as a default tuning policy.
parent 97239ca6
......@@ -34,12 +34,52 @@
#include <algorithm>
#include <assert.h>
#include <cmath>
#include <cstdlib>
#include <iterator>
#include <set>
using namespace OpenMM;
using namespace std;
namespace {
static const int MaxHipPmeFastGridLevel = 32;
static int getHipPmeFastGridLevel(bool isHip) {
if (!isHip)
return 0;
const char* value = getenv("OPENMM_HIP_PME_FAST_GRID_LEVEL");
if (value == NULL)
return 0;
char* end = NULL;
long level = strtol(value, &end, 10);
if (end == value || level <= 0)
return 0;
return (int) min(level, (long) MaxHipPmeFastGridLevel);
}
static bool usesAutomaticPmeGrid(double alpha, int xsize, int ysize, int zsize) {
return (alpha == 0.0 && xsize == 0 && ysize == 0 && zsize == 0);
}
static int findPreviousLegalFFTDimension(ComputeContext& cc, int size) {
for (int candidate = size-1; candidate > 0; candidate--)
if (cc.findLegalFFTDimension(candidate) == candidate)
return candidate;
return size;
}
static void applyHipPmeFastGrid(ComputeContext& cc, int level, int& xsize, int& ysize, int& zsize) {
if (level == 0)
return;
for (int i = 0; i < level; i++) {
xsize = findPreviousLegalFFTDimension(cc, xsize);
ysize = findPreviousLegalFFTDimension(cc, ysize);
zsize = findPreviousLegalFFTDimension(cc, zsize);
}
}
}
class CommonCalcNonbondedForceKernel::ForceInfo : public ComputeForceInfo {
public:
ForceInfo(const NonbondedForce& force) : force(force) {
......@@ -292,6 +332,7 @@ void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, cons
pmeGridIndexBlockSize = useLargeHipPmeBlocks ? 128 : -1;
pmeSpreadChargeBlockSize = useLargeHipPmeBlocks ? 128 : -1;
pmeFinishSpreadChargeBlockSize = isHip ? 128 : -1;
int hipPmeFastGridLevel = getHipPmeFastGridLevel(isHip);
pmeDispersionSpreadWaveSize = 64;
pmeDispersionSpreadBlockSize = 256;
pmeDispersionAtomsPerWave = pmeDispersionSpreadWaveSize/PmeOrder;
......@@ -382,16 +423,27 @@ void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, cons
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters.
double requestedAlpha;
int requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ;
force.getPMEParameters(requestedAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
bool useAutomaticPmeGrid = usesAutomaticPmeGrid(requestedAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (useAutomaticPmeGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, gridSizeX, gridSizeY, gridSizeZ);
if (doLJPME) {
double requestedDispersionAlpha;
force.getLJPMEParameters(requestedDispersionAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
bool useAutomaticDispersionGrid = usesAutomaticPmeGrid(requestedDispersionAlpha, requestedGridSizeX, requestedGridSizeY, requestedGridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX);
dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY);
dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ);
if (useAutomaticDispersionGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cc.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0/sqrt(M_PI));
......
......@@ -41,6 +41,7 @@
#include <algorithm>
#include <cmath>
#include <cstdlib>
#ifdef _MSC_VER
#include <windows.h>
#endif
......@@ -48,6 +49,8 @@
using namespace OpenMM;
using namespace std;
static const int MaxHipPmeFastGridLevel = 32;
static void setPeriodicBoxArgs(ComputeContext& cc, ComputeKernel kernel, int index) {
Vec3 a, b, c;
cc.getPeriodicBoxVectors(a, b, c);
......@@ -67,6 +70,40 @@ static void setPeriodicBoxArgs(ComputeContext& cc, ComputeKernel kernel, int ind
}
}
static int getHipPmeFastGridLevel(bool isHip) {
if (!isHip)
return 0;
const char* value = getenv("OPENMM_HIP_PME_FAST_GRID_LEVEL");
if (value == NULL)
return 0;
char* end = NULL;
long level = strtol(value, &end, 10);
if (end == value || level <= 0)
return 0;
return (int) min(level, (long) MaxHipPmeFastGridLevel);
}
static bool usesAutomaticPmeGrid(double alpha, int xsize, int ysize, int zsize) {
return (alpha == 0.0 && xsize == 0 && ysize == 0 && zsize == 0);
}
static int findPreviousLegalFFTDimension(ComputeContext& cc, int size) {
for (int candidate = size-1; candidate > 0; candidate--)
if (cc.findLegalFFTDimension(candidate) == candidate)
return candidate;
return size;
}
static void applyHipPmeFastGrid(ComputeContext& cc, int level, int& xsize, int& ysize, int& zsize) {
if (level == 0)
return;
for (int i = 0; i < level; i++) {
xsize = findPreviousLegalFFTDimension(cc, xsize);
ysize = findPreviousLegalFFTDimension(cc, ysize);
zsize = findPreviousLegalFFTDimension(cc, zsize);
}
}
/* -------------------------------------------------------------------------- *
* AmoebaTorsionTorsion *
* -------------------------------------------------------------------------- */
......@@ -427,12 +464,15 @@ void CommonCalcAmoebaMultipoleForceKernel::initialize(const System& system, cons
double sum = 0;
for (int j = i; j < maxExtrapolationOrder; j++)
sum += force.getExtrapolationCoefficients()[j];
coefficients << cc.doubleToString(sum);
coefficients << cc.doubleToString(sum);
}
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
bool isHip = (getPlatform().getName() == "HIP");
int hipPmeFastGridLevel = getHipPmeFastGridLevel(isHip);
if (usePME) {
int nx, ny, nz;
force.getPMEParameters(pmeAlpha, nx, ny, nz);
bool useAutomaticPmeGrid = usesAutomaticPmeGrid(pmeAlpha, nx, ny, nz);
if (nx == 0 || pmeAlpha == 0) {
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
......@@ -441,6 +481,8 @@ void CommonCalcAmoebaMultipoleForceKernel::initialize(const System& system, cons
gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (useAutomaticPmeGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, gridSizeX, gridSizeY, gridSizeZ);
}
else {
gridSizeX = cc.findLegalFFTDimension(nx);
......@@ -2573,9 +2615,12 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
}
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
cutoff = force.getCutoffDistance();
bool isHip = (getPlatform().getName() == "HIP");
int hipPmeFastGridLevel = getHipPmeFastGridLevel(isHip);
if (usePME) {
int nx, ny, nz;
force.getPMEParameters(pmeAlpha, nx, ny, nz);
bool useAutomaticPmeGrid = usesAutomaticPmeGrid(pmeAlpha, nx, ny, nz);
if (nx == 0 || pmeAlpha == 0) {
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
......@@ -2584,6 +2629,8 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (useAutomaticPmeGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, gridSizeX, gridSizeY, gridSizeZ);
}
else {
gridSizeX = cc.findLegalFFTDimension(nx);
......@@ -2591,6 +2638,7 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
gridSizeZ = cc.findLegalFFTDimension(nz);
}
force.getDPMEParameters(dpmeAlpha, nx, ny, nz);
bool useAutomaticDispersionGrid = usesAutomaticPmeGrid(dpmeAlpha, nx, ny, nz);
if (nx == 0 || dpmeAlpha == 0) {
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
......@@ -2599,6 +2647,8 @@ void CommonCalcHippoNonbondedForceKernel::initialize(const System& system, const
dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX);
dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY);
dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ);
if (useAutomaticDispersionGrid)
applyHipPmeFastGrid(cc, hipPmeFastGridLevel, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ);
}
else {
dispersionGridSizeX = cc.findLegalFFTDimension(nx);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment