Commit f0c2e89c authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimization: use mapped memory to communicate when CCMA is converged

parent a42431fd
...@@ -99,8 +99,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -99,8 +99,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL), posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL),
random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL), random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL),
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL), ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConvergedMemory(NULL),
ccmaConvergedMemory(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL) { vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL) {
// Create workspace arrays. // Create workspace arrays.
...@@ -466,8 +466,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -466,8 +466,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaAtoms = CudaArray::create<int2>(context, numCCMA, "CcmaAtoms"); ccmaAtoms = CudaArray::create<int2>(context, numCCMA, "CcmaAtoms");
ccmaAtomConstraints = CudaArray::create<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints"); ccmaAtomConstraints = CudaArray::create<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints = CudaArray::create<int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints = CudaArray::create<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConverged = CudaArray::create<int>(context, 2, "CcmaConverged"); CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, 2*sizeof(int), CU_MEMHOSTALLOC_DEVICEMAP), "Error allocating pinned memory");
CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, 2*sizeof(int), 0), "Error allocating pinned memory"); CHECK_RESULT2(cuMemHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
ccmaConstraintMatrixColumn = CudaArray::create<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn = CudaArray::create<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
vector<int2> atomsVec(ccmaAtoms->getSize()); vector<int2> atomsVec(ccmaAtoms->getSize());
vector<int> atomConstraintsVec(ccmaAtomConstraints->getSize()); vector<int> atomConstraintsVec(ccmaAtomConstraints->getSize());
...@@ -680,8 +680,6 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() { ...@@ -680,8 +680,6 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete ccmaDelta1; delete ccmaDelta1;
if (ccmaDelta2 != NULL) if (ccmaDelta2 != NULL)
delete ccmaDelta2; delete ccmaDelta2;
if (ccmaConverged != NULL)
delete ccmaConverged;
if (ccmaConvergedMemory != NULL) if (ccmaConvergedMemory != NULL)
cuMemFreeHost(ccmaConvergedMemory); cuMemFreeHost(ccmaConvergedMemory);
if (vsite2AvgAtoms != NULL) if (vsite2AvgAtoms != NULL)
...@@ -739,26 +737,23 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double ...@@ -739,26 +737,23 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
int i; int i;
void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(), void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(),
constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(), constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
&ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConverged->getDevicePointer(), &ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConvergedDeviceMemory,
&floatTol, &i}; &floatTol, &i};
void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(), void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
&ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConverged->getDevicePointer(), &i}; &ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConvergedDeviceMemory, &i};
void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(), void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(),
constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(), constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
&context.getVelm().getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(), &context.getVelm().getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
&ccmaConverged->getDevicePointer(), &i}; &ccmaConvergedDeviceMemory, &i};
const int checkInterval = 4; const int checkInterval = 4;
for (i = 0; i < 150; i++) { for (i = 0; i < 150; i++) {
if (i == 0) { if (i == 0) {
ccmaConvergedMemory[0] = 1; ccmaConvergedMemory[0] = 1;
ccmaConvergedMemory[1] = 0; ccmaConvergedMemory[1] = 0;
cuMemcpyHtoD(ccmaConverged->getDevicePointer(), ccmaConvergedMemory, 2*sizeof(int));
} }
context.executeKernel(ccmaForceKernel, forceArgs, ccmaAtoms->getSize()); context.executeKernel(ccmaForceKernel, forceArgs, ccmaAtoms->getSize());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0)
cuMemcpyDtoH(ccmaConvergedMemory, ccmaConverged->getDevicePointer(), 2*sizeof(int));
CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA"); CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA");
}
context.executeKernel(ccmaMultiplyKernel, multiplyArgs, ccmaAtoms->getSize()); context.executeKernel(ccmaMultiplyKernel, multiplyArgs, ccmaAtoms->getSize());
context.executeKernel(ccmaUpdateKernel, updateArgs, context.getNumAtoms()); context.executeKernel(ccmaUpdateKernel, updateArgs, context.getNumAtoms());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0) {
......
...@@ -133,8 +133,8 @@ private: ...@@ -133,8 +133,8 @@ private:
CudaArray* ccmaConstraintMatrixValue; CudaArray* ccmaConstraintMatrixValue;
CudaArray* ccmaDelta1; CudaArray* ccmaDelta1;
CudaArray* ccmaDelta2; CudaArray* ccmaDelta2;
CudaArray* ccmaConverged;
int* ccmaConvergedMemory; int* ccmaConvergedMemory;
CUdeviceptr ccmaConvergedDeviceMemory;
CUevent ccmaEvent; CUevent ccmaEvent;
CudaArray* vsite2AvgAtoms; CudaArray* vsite2AvgAtoms;
CudaArray* vsite2AvgWeights; CudaArray* vsite2AvgWeights;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment