Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f4dc3110
"csrc/vscode:/vscode.git/clone" did not exist on "e83a41198e13e557eb5e2628caa13a9badd8c816"
Commit
f4dc3110
authored
Jan 24, 2018
by
Peter Eastman
Browse files
CUDA implementation of RMSDForce
parent
2855d12e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
378 additions
and
0 deletions
+378
-0
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+53
-0
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+2
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+195
-0
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+1
-0
platforms/cuda/src/kernels/rmsd.cu
platforms/cuda/src/kernels/rmsd.cu
+91
-0
platforms/cuda/tests/TestCudaRMSDForce.cpp
platforms/cuda/tests/TestCudaRMSDForce.cpp
+36
-0
No files found.
platforms/cuda/include/CudaKernels.h
View file @
f4dc3110
...
@@ -1278,6 +1278,59 @@ private:
...
@@ -1278,6 +1278,59 @@ private:
CUfunction
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
CUfunction
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
};
/**
* This kernel is invoked by RMSDForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaCalcRMSDForceKernel
:
public
CalcRMSDForceKernel
{
public:
CudaCalcRMSDForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CalcRMSDForceKernel
(
name
,
platform
),
cu
(
cu
),
referencePos
(
NULL
),
particles
(
NULL
),
buffer
(
NULL
)
{
}
~
CudaCalcRMSDForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RMSDForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
RMSDForce
&
force
);
/**
* Record the reference positions and particle indices.
*/
void
recordParameters
(
const
RMSDForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* This is the internal implementation of execute(), templatized on whether we're
* using single or double precision.
*/
template
<
class
REAL
>
double
executeImpl
(
ContextImpl
&
context
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RMSDForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
RMSDForce
&
force
);
private:
class
ForceInfo
;
CudaContext
&
cu
;
ForceInfo
*
info
;
double
sumNormRef
;
CudaArray
*
referencePos
;
CudaArray
*
particles
;
CudaArray
*
buffer
;
CUfunction
kernel1
,
kernel2
;
};
/**
/**
* This kernel is invoked by VerletIntegrator to take one time step.
* This kernel is invoked by VerletIntegrator to take one time step.
*/
*/
...
...
platforms/cuda/src/CudaKernelFactory.cpp
View file @
f4dc3110
...
@@ -110,6 +110,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -110,6 +110,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
return
new
CudaCalcCustomCVForceKernel
(
name
,
platform
,
cu
);
return
new
CudaCalcCustomCVForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcRMSDForceKernel
::
Name
())
return
new
CudaCalcRMSDForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
CudaCalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
f4dc3110
...
@@ -51,6 +51,7 @@
...
@@ -51,6 +51,7 @@
#include "ReferenceTabulatedFunction.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include "jama_eig.h"
#include <algorithm>
#include <algorithm>
#include <cmath>
#include <cmath>
#include <set>
#include <set>
...
@@ -6784,6 +6785,200 @@ void CudaCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& i
...
@@ -6784,6 +6785,200 @@ void CudaCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& i
innerContext.setParameter(param.first, context.getParameter(param.first));
innerContext.setParameter(param.first, context.getParameter(param.first));
}
}
class CudaCalcRMSDForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const RMSDForce& force) : force(force) {
updateParticles();
}
void updateParticles() {
particles.clear();
for (int i : force.getParticles())
particles.insert(i);
}
bool areParticlesIdentical(int particle1, int particle2) {
bool include1 = (particles.find(particle1) != particles.end());
bool include2 = (particles.find(particle2) != particles.end());
return (include1 == include2);
}
private:
const RMSDForce& force;
set<int> particles;
};
CudaCalcRMSDForceKernel::~CudaCalcRMSDForceKernel() {
if (referencePos != NULL)
delete referencePos;
if (particles != NULL)
delete particles;
if (buffer != NULL)
delete buffer;
}
void CudaCalcRMSDForceKernel::initialize(const System& system, const RMSDForce& force) {
// Create data structures.
bool useDouble = cu.getUseDoublePrecision();
int elementSize = (useDouble ? sizeof(double) : sizeof(float));
int numParticles = force.getParticles().size();
if (numParticles == 0)
numParticles = system.getNumParticles();
referencePos = new CudaArray(cu, system.getNumParticles(), 4*elementSize, "referencePos");
particles = CudaArray::create<int>(cu, numParticles, "particles");
buffer = new CudaArray(cu, 13, elementSize, "buffer");
recordParameters(force);
info = new ForceInfo(force);
cu.addForce(info);
// Create the kernels.
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::rmsd);
kernel1 = cu.getKernel(module, "computeRMSDPart1");
kernel2 = cu.getKernel(module, "computeRMSDForces");
}
void CudaCalcRMSDForceKernel::recordParameters(const RMSDForce& force) {
// Record the parameters and center the reference positions.
vector<int> particleVec = force.getParticles();
if (particleVec.size() == 0)
for (int i = 0; i < cu.getNumAtoms(); i++)
particleVec.push_back(i);
vector<Vec3> centeredPositions = force.getReferencePositions();
Vec3 center;
for (int i : particleVec)
center += centeredPositions[i];
center /= particleVec.size();
for (Vec3& p : centeredPositions)
p -= center;
// Upload them to the device.
particles->upload(particleVec);
if (cu.getUseDoublePrecision()) {
vector<double4> pos;
for (Vec3 p : centeredPositions)
pos.push_back(make_double4(p[0], p[1], p[2], 0));
referencePos->upload(pos);
}
else {
vector<float4> pos;
for (Vec3 p : centeredPositions)
pos.push_back(make_float4(p[0], p[1], p[2], 0));
referencePos->upload(pos);
}
// Record the sum of the norms of the reference positions.
sumNormRef = 0.0;
for (int i : particleVec) {
Vec3 p = centeredPositions[i];
sumNormRef += p.dot(p);
}
}
double CudaCalcRMSDForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (cu.getUseDoublePrecision())
return executeImpl<double>(context);
return executeImpl<float>(context);
}
template <class REAL>
double CudaCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Execute the first kernel.
int numParticles = particles->getSize();
int blockSize = 128;
void* args1[] = {&numParticles, &cu.getPosq().getDevicePointer(), &referencePos->getDevicePointer(),
&particles->getDevicePointer(), &buffer->getDevicePointer()};
cu.executeKernel(kernel1, args1, blockSize, blockSize, blockSize*sizeof(REAL));
// Download the results, build the F matrix, and find the maximum eigenvalue
// and eigenvector.
vector<REAL> b;
buffer->download(b);
Array2D<double> F(4, 4);
F[0][0] = b[0*3+0] + b[1*3+1] + b[2*3+2];
F[1][0] = b[1*3+2] - b[2*3+1];
F[2][0] = b[2*3+0] - b[0*3+2];
F[3][0] = b[0*3+1] - b[1*3+0];
F[0][1] = b[1*3+2] - b[2*3+1];
F[1][1] = b[0*3+0] - b[1*3+1] - b[2*3+2];
F[2][1] = b[0*3+1] + b[1*3+0];
F[3][1] = b[0*3+2] + b[2*3+0];
F[0][2] = b[2*3+0] - b[0*3+2];
F[1][2] = b[0*3+1] + b[1*3+0];
F[2][2] = -b[0*3+0] + b[1*3+1] - b[2*3+2];
F[3][2] = b[1*3+2] + b[2*3+1];
F[0][3] = b[0*3+1] - b[1*3+0];
F[1][3] = b[0*3+2] + b[2*3+0];
F[2][3] = b[1*3+2] + b[2*3+1];
F[3][3] = -b[0*3+0] - b[1*3+1] + b[2*3+2];
JAMA::Eigenvalue<double> eigen(F);
Array1D<double> values;
eigen.getRealEigenvalues(values);
Array2D<double> vectors;
eigen.getV(vectors);
// Compute the RMSD.
double msd = (sumNormRef+b[9]-2*values[3])/numParticles;
if (msd < 1e-20) {
// The particles are perfectly aligned, so all the forces should be zero.
// Numerical error can lead to NaNs, so just return 0 now.
return 0.0;
}
double rmsd = sqrt(msd);
b[9] = rmsd;
// Compute the rotation matrix.
double q[] = {vectors[0][3], vectors[1][3], vectors[2][3], vectors[3][3]};
double q00 = q[0]*q[0], q01 = q[0]*q[1], q02 = q[0]*q[2], q03 = q[0]*q[3];
double q11 = q[1]*q[1], q12 = q[1]*q[2], q13 = q[1]*q[3];
double q22 = q[2]*q[2], q23 = q[2]*q[3];
double q33 = q[3]*q[3];
b[0] = q00+q11-q22-q33;
b[1] = 2*(q12-q03);
b[2] = 2*(q13+q02);
b[3] = 2*(q12+q03);
b[4] = q00-q11+q22-q33;
b[5] = 2*(q23-q01);
b[6] = 2*(q13-q02);
b[7] = 2*(q23+q01);
b[8] = q00-q11-q22+q33;
// Upload it to the device and invoke the kernel to apply forces.
buffer->upload(b);
int paddedNumAtoms = cu.getPaddedNumAtoms();
void* args2[] = {&numParticles, &paddedNumAtoms, &cu.getPosq().getDevicePointer(), &referencePos->getDevicePointer(),
&particles->getDevicePointer(), &buffer->getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(kernel2, args2, numParticles);
return rmsd;
}
void CudaCalcRMSDForceKernel::copyParametersToContext(ContextImpl& context, const RMSDForce& force) {
if (referencePos->getSize() != force.getReferencePositions().size())
throw OpenMMException("updateParametersInContext: The number of reference positions has changed");
int numParticles = force.getParticles().size();
if (numParticles == 0)
numParticles = context.getSystem().getNumParticles();
if (numParticles != particles->getSize()) {
// Recreate the particles array.
delete particles;
particles = NULL;
particles = CudaArray::create<int>(cu, numParticles, "particles");
}
recordParameters(force);
// Mark that the current reordering may be invalid.
info->updateParticles();
cu.invalidateMolecules(info);
}
CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
}
}
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
f4dc3110
...
@@ -92,6 +92,7 @@ CudaPlatform::CudaPlatform() {
...
@@ -92,6 +92,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCVForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCVForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcRMSDForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
...
platforms/cuda/src/kernels/rmsd.cu
0 → 100644
View file @
f4dc3110
// This file contains kernels to compute the RMSD and its gradient using the algorithm described
// in Coutsias et al, "Using quaternions to calculate RMSD" (doi: 10.1002/jcc.20110).
/**
* Sum a value over all threads.
*/
__device__
real
reduceValue
(
real
value
,
real
*
temp
)
{
const
int
thread
=
threadIdx
.
x
;
temp
[
thread
]
=
value
;
__syncthreads
();
for
(
uint
step
=
1
;
step
<
blockDim
.
x
;
step
*=
2
)
{
if
(
thread
+
step
<
blockDim
.
x
&&
thread
%
(
2
*
step
)
==
0
)
temp
[
thread
]
=
temp
[
thread
]
+
temp
[
thread
+
step
];
__syncthreads
();
}
return
temp
[
0
];
}
/**
* Perform the first step of computing the RMSD. This is executed as a single work group.
*/
extern
"C"
__global__
void
computeRMSDPart1
(
int
numParticles
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
referencePos
,
const
int
*
__restrict__
particles
,
real
*
buffer
)
{
extern
__shared__
real
temp
[];
// Compute the center of the particle positions.
real3
center
=
make_real3
(
0
);
for
(
int
i
=
threadIdx
.
x
;
i
<
numParticles
;
i
+=
blockDim
.
x
)
center
+=
trimTo3
(
posq
[
particles
[
i
]]);
center
.
x
=
reduceValue
(
center
.
x
,
temp
)
/
numParticles
;
center
.
y
=
reduceValue
(
center
.
y
,
temp
)
/
numParticles
;
center
.
z
=
reduceValue
(
center
.
z
,
temp
)
/
numParticles
;
// Compute the correlation matrix.
real
R
[
3
][
3
]
=
{{
0
,
0
,
0
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}};
real
sum
=
0
;
for
(
int
i
=
threadIdx
.
x
;
i
<
numParticles
;
i
+=
blockDim
.
x
)
{
int
index
=
particles
[
i
];
real3
pos
=
trimTo3
(
posq
[
index
])
-
center
;
real3
refPos
=
trimTo3
(
referencePos
[
index
]);
R
[
0
][
0
]
+=
pos
.
x
*
refPos
.
x
;
R
[
0
][
1
]
+=
pos
.
x
*
refPos
.
y
;
R
[
0
][
2
]
+=
pos
.
x
*
refPos
.
z
;
R
[
1
][
0
]
+=
pos
.
y
*
refPos
.
x
;
R
[
1
][
1
]
+=
pos
.
y
*
refPos
.
y
;
R
[
1
][
2
]
+=
pos
.
y
*
refPos
.
z
;
R
[
2
][
0
]
+=
pos
.
z
*
refPos
.
x
;
R
[
2
][
1
]
+=
pos
.
z
*
refPos
.
y
;
R
[
2
][
2
]
+=
pos
.
z
*
refPos
.
z
;
sum
+=
dot
(
pos
,
pos
);
}
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
R
[
i
][
j
]
=
reduceValue
(
R
[
i
][
j
],
temp
);
sum
=
reduceValue
(
sum
,
temp
);
// Copy everything into the output buffer to send back to the host.
if
(
threadIdx
.
x
==
0
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
buffer
[
3
*
i
+
j
]
=
R
[
i
][
j
];
buffer
[
9
]
=
sum
;
buffer
[
10
]
=
center
.
x
;
buffer
[
11
]
=
center
.
y
;
buffer
[
12
]
=
center
.
z
;
}
}
/**
* Apply forces based on the RMSD.
*/
extern
"C"
__global__
void
computeRMSDForces
(
int
numParticles
,
int
paddedNumAtoms
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
referencePos
,
const
int
*
__restrict__
particles
,
const
real
*
buffer
,
unsigned
long
long
*
__restrict__
forceBuffers
)
{
real3
center
=
make_real3
(
buffer
[
10
],
buffer
[
11
],
buffer
[
12
]);
real
scale
=
1
/
(
real
)
(
buffer
[
9
]
*
numParticles
);
for
(
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
i
<
numParticles
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
particles
[
i
];
real3
pos
=
trimTo3
(
posq
[
index
])
-
center
;
real3
refPos
=
trimTo3
(
referencePos
[
index
]);
real3
rotatedRef
=
make_real3
(
buffer
[
0
]
*
refPos
.
x
+
buffer
[
3
]
*
refPos
.
y
+
buffer
[
6
]
*
refPos
.
z
,
buffer
[
1
]
*
refPos
.
x
+
buffer
[
4
]
*
refPos
.
y
+
buffer
[
7
]
*
refPos
.
z
,
buffer
[
2
]
*
refPos
.
x
+
buffer
[
5
]
*
refPos
.
y
+
buffer
[
8
]
*
refPos
.
z
);
real3
force
=
(
rotatedRef
-
pos
)
*
scale
;
atomicAdd
(
&
forceBuffers
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index
+
paddedNumAtoms
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index
+
2
*
paddedNumAtoms
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
}
}
platforms/cuda/tests/TestCudaRMSDForce.cpp
0 → 100644
View file @
f4dc3110
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestRMSDForce.h"
void
runPlatformTests
()
{
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment