"platforms/vscode:/vscode.git/clone" did not exist on "c3c6724a5adb65a557b9cc7ee4bbe2c6c3151df3"
Commit 59836b77 authored by peastman's avatar peastman
Browse files

Merge branch 'master' of https://github.com/SimTk/openmm

parents 99b62b77 f769300f
......@@ -762,6 +762,26 @@ where :math:`m_i` and :math:`\mathbf{v}_i` are the mass and velocity of particle
\ *i*\ . It then subtracts :math:`\mathbf{v}_\text{CM}` from the velocity of every
particle.
RMSDForce
*********
RMSDForce computes the root-mean-squared deviation (RMSD) between the current
particle positions :math:`\mathbf{x}_i` and a set of reference positions
:math:`\mathbf{x}_i^\text{ref}`:
.. math::
\text{RMSD} = \sqrt{\frac{\sum_{i} \| \mathbf{x}_i - \mathbf{x}_i^\text{ref} \|^2}{N}}
Before computing this, the reference positions are first translated and rotated
so as to minimize the RMSD. The computed value is therefore :math:`argmin(\text{RMSD})`,
where the :math:`argmin` is taken over all possible translations and rotations.
This force is normally used with a CustomCVForce (see Section :ref:`customcvforce`).
One rarely wants a force whose energy exactly equals the RMSD, but there are many
situations where it is useful to have a restraining or biasing force that depends
on the RMSD in some way.
Custom Forces
#############
......@@ -1158,6 +1178,8 @@ specified in three ways:
* Per-donor parameters are defined by specifying a value for each donor group.
* Per-acceptor parameters are defined by specifying a value for each acceptor group.
.. _customcvforce:
CustomCVForce
*************
......
......@@ -6887,7 +6887,7 @@ double CudaCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Execute the first kernel.
int numParticles = particles->getSize();
int blockSize = 128;
int blockSize = 256;
void* args1[] = {&numParticles, &cu.getPosq().getDevicePointer(), &referencePos->getDevicePointer(),
&particles->getDevicePointer(), &buffer->getDevicePointer()};
cu.executeKernel(kernel1, args1, blockSize, blockSize, blockSize*sizeof(REAL));
......
......@@ -35,6 +35,14 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom];
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
if (charge == 0)
continue;
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
......@@ -67,12 +75,6 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndex.x+ix;
xbase -= (xbase >= GRID_SIZE_X ? GRID_SIZE_X : 0);
......
......@@ -4,11 +4,16 @@
/**
* Sum a value over all threads.
*/
__device__ real reduceValue(real value, real* temp) {
__device__ real reduceValue(real value, volatile real* temp) {
const int thread = threadIdx.x;
temp[thread] = value;
__syncthreads();
for (uint step = 1; step < blockDim.x; step *= 2) {
for (uint step = 1; step < 32; step *= 2) {
if (thread+step < blockDim.x && thread%(2*step) == 0)
temp[thread] = temp[thread] + temp[thread+step];
SYNC_WARPS
}
for (uint step = 32; step < blockDim.x; step *= 2) {
if (thread+step < blockDim.x && thread%(2*step) == 0)
temp[thread] = temp[thread] + temp[thread+step];
__syncthreads();
......@@ -21,7 +26,7 @@ __device__ real reduceValue(real value, real* temp) {
*/
extern "C" __global__ void computeRMSDPart1(int numParticles, const real4* __restrict__ posq, const real4* __restrict__ referencePos,
const int* __restrict__ particles, real* buffer) {
extern __shared__ real temp[];
extern __shared__ volatile real temp[];
// Compute the center of the particle positions.
......
......@@ -7167,7 +7167,7 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Execute the first kernel.
int numParticles = particles->getSize();
int blockSize = 128;
int blockSize = min(256, (int) kernel1.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice()));
kernel1.setArg<cl_int>(0, numParticles);
kernel1.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, referencePos->getDeviceBuffer());
......
......@@ -110,6 +110,14 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom];
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
if (charge == 0)
continue;
APPLY_PERIODIC_TO_POS(pos)
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
......@@ -142,12 +150,6 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix;
xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0);
......@@ -224,6 +226,8 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
#else
const real charge = pos.w;
#endif
if (charge == 0)
continue;
bool hasComputedThetas = false;
for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix;
......
......@@ -4,11 +4,16 @@
/**
* Sum a value over all threads.
*/
real reduceValue(real value, __local real* temp) {
real reduceValue(real value, __local volatile real* temp) {
const int thread = get_local_id(0);
temp[thread] = value;
barrier(CLK_LOCAL_MEM_FENCE);
for (uint step = 1; step < get_local_size(0); step *= 2) {
for (uint step = 1; step < 32; step *= 2) {
if (thread+step < get_local_size(0) && thread%(2*step) == 0)
temp[thread] = temp[thread] + temp[thread+step];
SYNC_WARPS
}
for (uint step = 32; step < get_local_size(0); step *= 2) {
if (thread+step < get_local_size(0) && thread%(2*step) == 0)
temp[thread] = temp[thread] + temp[thread+step];
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -20,7 +25,7 @@ real reduceValue(real value, __local real* temp) {
* Perform the first step of computing the RMSD. This is executed as a single work group.
*/
__kernel void computeRMSDPart1(int numParticles, __global const real4* restrict posq, __global const real4* restrict referencePos,
__global const int* restrict particles, __global real* buffer, __local real* restrict temp) {
__global const int* restrict particles, __global real* buffer, __local volatile real* restrict temp) {
// Compute the center of the particle positions.
real3 center = (real3) 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment