Commit 59c809c0 authored by peastman's avatar peastman
Browse files

Began overhaul of CUDA CustomIntegrator in preparation for supporting flow control

parent 44b96f0c
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2014 Stanford University and the Authors. * * Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "windowsExportCpu.h" #include "windowsExport.h"
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -42,7 +42,7 @@ namespace OpenMM { ...@@ -42,7 +42,7 @@ namespace OpenMM {
/** /**
* This class simplifies the management of a set of related CompiledExpressions that share variables. * This class simplifies the management of a set of related CompiledExpressions that share variables.
*/ */
class OPENMM_EXPORT_CPU CompiledExpressionSet { class OPENMM_EXPORT CompiledExpressionSet {
public: public:
CompiledExpressionSet(); CompiledExpressionSet();
/** /**
...@@ -60,6 +60,10 @@ public: ...@@ -60,6 +60,10 @@ public:
* @param value the value to set it to * @param value the value to set it to
*/ */
void setVariable(int index, double value); void setVariable(int index, double value);
/**
* Get the total number of variables for which indices have been allocated.
*/
int getNumVariables() const;
private: private:
std::vector<Lepton::CompiledExpression*> expressions; std::vector<Lepton::CompiledExpression*> expressions;
std::vector<std::string> variables; std::vector<std::string> variables;
......
/* Portions copyright (c) 2014 Stanford University and Simbios. /* Portions copyright (c) 2014-2015 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "CompiledExpressionSet.h" #include "openmm/internal/CompiledExpressionSet.h"
using namespace OpenMM; using namespace OpenMM;
using namespace Lepton; using namespace Lepton;
...@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) { ...@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) {
for (int i = 0; i < (int) variableReferences[index].size(); i++) for (int i = 0; i < (int) variableReferences[index].size(); i++)
*variableReferences[index][i] = value; *variableReferences[index][i] = value;
} }
int CompiledExpressionSet::getNumVariables() const {
return variables.size();
}
...@@ -25,10 +25,10 @@ ...@@ -25,10 +25,10 @@
#ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__ #ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__
#define OPENMM_CPU_CUSTOM_GB_FORCE_H__ #define OPENMM_CPU_CUSTOM_GB_FORCE_H__
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "openmm/CustomGBForce.h" #include "openmm/CustomGBForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include <map> #include <map>
......
...@@ -27,9 +27,9 @@ ...@@ -27,9 +27,9 @@
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "ReferenceBondIxn.h" #include "ReferenceBondIxn.h"
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "openmm/CustomManyParticleForce.h" #include "openmm/CustomManyParticleForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
......
...@@ -35,6 +35,9 @@ ...@@ -35,6 +35,9 @@
#include "CudaSort.h" #include "CudaSort.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include <cufft.h> #include <cufft.h>
namespace OpenMM { namespace OpenMM {
...@@ -1213,6 +1216,7 @@ private: ...@@ -1213,6 +1216,7 @@ private:
*/ */
class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel { class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu), CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), potentialEnergy(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), potentialEnergy(NULL),
kineticEnergy(NULL), uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) { kineticEnergy(NULL), uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) {
...@@ -1279,15 +1283,17 @@ public: ...@@ -1279,15 +1283,17 @@ public:
void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values); void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
private: private:
class ReorderListener; class ReorderListener;
class GlobalTarget;
std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName); std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName);
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName); std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
CudaContext& cu; CudaContext& cu;
double prevStepSize, energy; double prevStepSize, energy;
float energyFloat; float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
CudaArray* globalValues; CudaArray* globalValues;
CudaArray* contextParameterValues; CudaArray* contextParameterValues;
...@@ -1303,19 +1309,43 @@ private: ...@@ -1303,19 +1309,43 @@ private:
mutable std::vector<std::vector<double> > localPerDofValuesDouble; mutable std::vector<std::vector<double> > localPerDofValuesDouble;
std::vector<float> contextValuesFloat; std::vector<float> contextValuesFloat;
std::vector<double> contextValuesDouble; std::vector<double> contextValuesDouble;
std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables;
std::vector<std::vector<CUfunction> > kernels; std::vector<std::vector<CUfunction> > kernels;
std::vector<std::vector<std::vector<void*> > > kernelArgs; std::vector<std::vector<std::vector<void*> > > kernelArgs;
std::vector<void*> kineticEnergyArgs; std::vector<void*> kineticEnergyArgs;
CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel; CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType; std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<CustomIntegratorUtilities::Comparison> comparisons;
std::vector<std::vector<Lepton::CompiledExpression> > globalExpressions;
CompiledExpressionSet expressionSet;
std::vector<bool> needsGlobals;
std::vector<bool> needsForces; std::vector<bool> needsForces;
std::vector<bool> needsEnergy; std::vector<bool> needsEnergy;
std::vector<bool> computeBothForceAndEnergy;
std::vector<bool> invalidatesForces; std::vector<bool> invalidatesForces;
std::vector<bool> merged; std::vector<bool> merged;
std::vector<int> forceGroup; std::vector<int> forceGroupFlags;
std::vector<int> blockEnd;
std::vector<int> requiredGaussian; std::vector<int> requiredGaussian;
std::vector<int> requiredUniform; std::vector<int> requiredUniform;
std::vector<int> stepEnergyVariableIndex;
std::vector<int> globalVariableIndex;
std::vector<int> parameterVariableIndex;
int gaussianVariableIndex, uniformVariableIndex, dtVariableIndex;
std::vector<std::string> parameterNames; std::vector<std::string> parameterNames;
std::vector<GlobalTarget> stepTarget;
};
class CudaIntegrateCustomStepKernel::GlobalTarget {
public:
CudaIntegrateCustomStepKernel::GlobalTargetType type;
int variableIndex;
GlobalTarget() {
}
GlobalTarget(CudaIntegrateCustomStepKernel::GlobalTargetType type, int variableIndex) : type(type), variableIndex(variableIndex) {
}
}; };
/** /**
......
This diff is collapsed.
...@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer, ...@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer,
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
result[SUM_OUTPUT_INDEX] = tempBuffer[0]; *result = tempBuffer[0];
} }
extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer, double* result) { extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer, double* result) {
...@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer ...@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
result[SUM_OUTPUT_INDEX] = tempBuffer[0]; *result = tempBuffer[0];
} }
extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta) { extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta) {
......
...@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co ...@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed4 xpj2 = make_mixed4(0); mixed4 xpj2 = make_mixed4(0);
float invMassCentral = params.x; float invMassCentral = params.x;
float avgMass = params.y; float avgMass = params.y;
float d2 = params.z;
float invMassPeripheral = params.w; float invMassPeripheral = params.w;
if (atoms.z != -1) { if (atoms.z != -1) {
pos2 = loadPos(oldPos, posCorrection, atoms.z); pos2 = loadPos(oldPos, posCorrection, atoms.z);
...@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co ...@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z; mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z; mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z; mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
mixed ld1 = d2-rij1sq;
mixed ld2 = d2-rij2sq;
mixed ld3 = d2-rij3sq;
// Iterate until convergence. // Iterate until convergence.
...@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest ...@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest
if (threadIdx.x == 0) if (threadIdx.x == 0)
groupConverged = 1; groupConverged = 1;
__syncthreads(); __syncthreads();
mixed lowerTol = 1-2*tol+tol*tol;
mixed upperTol = 1+2*tol+tol*tol;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CCMA_CONSTRAINTS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CCMA_CONSTRAINTS; index += blockDim.x*gridDim.x) {
// Compute the force due to this constraint. // Compute the force due to this constraint.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment