Commit 3749e84b authored by Peter Eastman's avatar Peter Eastman
Browse files

Various cleanups to integration code

parent 1ef6e1d8
......@@ -39,7 +39,7 @@
namespace OpenMM {
/**
* This is an Integrator which simulates a System using the velocity Verlet algorithm.
* This is an Integrator which simulates a System using the leap-frog Verlet algorithm.
*/
class OPENMM_EXPORT VerletIntegrator : public Integrator {
......
......@@ -488,7 +488,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
prevFriction = friction;
prevStepSize = stepSize;
}
kUpdatePart1(gpu);
kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
......@@ -497,7 +497,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
if (step%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
}
kUpdatePart2(gpu);
kLangevinUpdatePart2(gpu);
kApplySecondShake(gpu);
kApplySecondSettle(gpu);
kApplySecondCCMA(gpu);
......
......@@ -40,14 +40,14 @@ extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern void kCalculateLocalForces(gpuContext gpu);
extern void kCalculateAndersenThermostat(gpuContext gpu);
extern void kReduceBornSumAndForces(gpuContext gpu);
extern void kUpdatePart1(gpuContext gpu);
extern void kApplyFirstShake(gpuContext gpu);
extern void kApplyFirstCCMA(gpuContext gpu);
extern void kApplyFirstSettle(gpuContext gpu);
extern void kUpdatePart2(gpuContext gpu);
extern void kApplySecondShake(gpuContext gpu);
extern void kApplySecondCCMA(gpuContext gpu);
extern void kApplySecondSettle(gpuContext gpu);
extern void kLangevinUpdatePart1(gpuContext gpu);
extern void kLangevinUpdatePart2(gpuContext gpu);
extern void kVerletUpdatePart1(gpuContext gpu);
extern void kVerletUpdatePart2(gpuContext gpu);
extern void kBrownianUpdatePart1(gpuContext gpu);
......@@ -72,8 +72,10 @@ extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
extern void GetCalculateAndersenThermostatSim(gpuContext gpu);
extern void SetForcesSim(gpuContext gpu);
extern void GetForcesSim(gpuContext gpu);
extern void SetUpdateShakeHSim(gpuContext gpu);
extern void GetUpdateShakeHSim(gpuContext gpu);
extern void SetShakeHSim(gpuContext gpu);
extern void GetShakeHSim(gpuContext gpu);
extern void SetLangevinUpdateSim(gpuContext gpu);
extern void GetLangevinUpdateSim(gpuContext gpu);
extern void SetSettleSim(gpuContext gpu);
extern void GetSettleSim(gpuContext gpu);
extern void SetCCMASim(gpuContext gpu);
......
......@@ -1757,7 +1757,8 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateObcGbsaForces2Sim(gpu);
SetCalculateAndersenThermostatSim(gpu);
SetForcesSim(gpu);
SetUpdateShakeHSim(gpu);
SetShakeHSim(gpu);
SetLangevinUpdateSim(gpu);
SetVerletUpdateSim(gpu);
SetBrownianUpdateSim(gpu);
SetSettleSim(gpu);
......
......@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#define DeltaShake
static __constant__ cudaGmxSimulation cSim;
void SetBrownianUpdateSim(gpuContext gpu)
......@@ -66,15 +64,9 @@ __global__ void kBrownianUpdatePart1_kernel()
float4 force = cSim.pForce4[pos];
cSim.pOldPosq[pos] = apos;
#ifndef DeltaShake
apos.x += force.x*cSim.GDT + random4a.x;
apos.y += force.y*cSim.GDT + random4a.y;
apos.z += force.z*cSim.GDT + random4a.z;
#else
apos.x = force.x*cSim.GDT + random4a.x;
apos.y = force.y*cSim.GDT + random4a.y;
apos.z = force.z*cSim.GDT + random4a.z;
#endif
cSim.pPosqP[pos] = apos;
pos += blockDim.x * gridDim.x;
}
......@@ -99,11 +91,6 @@ __global__ void kBrownianUpdatePart2_kernel()
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
#ifndef DeltaShake
velocity.x = cSim.oneOverDeltaT*(xPrime.x-apos.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y-apos.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z-apos.z);
#else
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
......@@ -111,7 +98,7 @@ __global__ void kBrownianUpdatePart2_kernel()
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#endif
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
using namespace std;
#include "gputypes.h"
static __constant__ cudaGmxSimulation cSim;
void SetLangevinUpdateSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetLangevinUpdateSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
// Include versions of the kernels with and with center of mass motion removal.
#include "kLangevinUpdate.h"
#define REMOVE_CM
#include "kLangevinUpdate.h"
void kLangevinUpdatePart1(gpuContext gpu)
{
// printf("kLangevinUpdatePart1\n");
if (gpu->bRemoveCM)
{
kLangevinUpdatePart1CM_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block, gpu->sim.update_threads_per_block * sizeof(float3)>>>();
LAUNCHERROR("kLangevinUpdatePart1CM");
gpu->bRemoveCM = false;
}
else
{
kLangevinUpdatePart1_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kLangevinUpdatePart1");
}
}
extern void kGenerateRandoms(gpuContext gpu);
void kLangevinUpdatePart2(gpuContext gpu)
{
// printf("kLangevinUpdatePart2\n");
if (gpu->bCalculateCM)
{
kLangevinUpdatePart2CM_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block, gpu->sim.update_threads_per_block * sizeof(float3)>>>();
LAUNCHERROR("kLangevinUpdatePart2CM");
gpu->bCalculateCM = false;
gpu->bRemoveCM = true;
}
else
{
kLangevinUpdatePart2_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kLangevinUpdatePart2");
}
// Update randoms if necessary
gpu->iterations++;
if (gpu->iterations == gpu->sim.randomIterations)
{
kGenerateRandoms(gpu);
gpu->iterations = 0;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Langevin integration. It is included
* several times in kLangevinUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__ void kLangevinUpdatePart1CM_kernel()
#else
__global__ void kLangevinUpdatePart1_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
#endif
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xVector = cSim.pxVector4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float2 random2a = cSim.pRandom2a[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos];
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * cSim.DOverTauC + sqrtInvMass * random4a.x;
Vmh.y = xVector.y * cSim.DOverTauC + sqrtInvMass * random4a.y;
Vmh.z = xVector.z * cSim.DOverTauC + sqrtInvMass * random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * random4a.w;
vVector.y = sqrtInvMass * random2a.x;
vVector.z = sqrtInvMass * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * cSim.EM +
velocity.w * force.x * cSim.TauOneMinusEM +
vVector.x -
cSim.EM * Vmh.x;
velocity.y = velocity.y * cSim.EM +
velocity.w * force.y * cSim.TauOneMinusEM +
vVector.y -
cSim.EM * Vmh.y;
velocity.z = velocity.z * cSim.EM +
velocity.w * force.z * cSim.TauOneMinusEM +
vVector.z -
cSim.EM * Vmh.z;
#ifdef REMOVE_CM
velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z;
#endif
cSim.pOldPosq[pos] = apos;
apos.x = velocity.x * cSim.fix1;
apos.y = velocity.y * cSim.fix1;
apos.z = velocity.z * cSim.fix1;
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
#ifdef REMOVE_CM
__global__ void kLangevinUpdatePart2CM_kernel()
#else
__global__ void kLangevinUpdatePart2_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f};
__syncthreads();
#endif
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xPrime = cSim.pPosqP[pos];
float4 vVector = cSim.pvVector4[pos];
float4 xVector;
float4 random4b = cSim.pRandom4b[rpos + pos];
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float sqrtInvMass = sqrt(velocity.w);
velocity.x = xPrime.x * cSim.oneOverFix1;
velocity.y = xPrime.y * cSim.oneOverFix1;
velocity.z = xPrime.z * cSim.oneOverFix1;
#ifdef REMOVE_CM
float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
#endif;
Xmh.x = vVector.x * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.x;
Xmh.y = vVector.y * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.y;
Xmh.z = vVector.z * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.z;
xVector.x = sqrtInvMass * random4b.w;
xVector.y = sqrtInvMass * random2b.x;
xVector.z = sqrtInvMass * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
cSim.pxVector4[pos] = xVector;
pos += blockDim.x * gridDim.x;
}
// Update random position pointer
if (threadIdx.x == 0)
{
rpos += cSim.paddedNumberOfAtoms;
if (rpos > cSim.randoms)
rpos -= cSim.randoms;
cSim.pRandomPosition[blockIdx.x] = rpos;
}
#ifdef REMOVE_CM
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
#endif
}
......@@ -33,8 +33,6 @@
//#include <fstream>
using namespace std;
#define DeltaShake
#include "gputypes.h"
......
......@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#define DeltaShake
static __constant__ cudaGmxSimulation cSim;
void SetVerletUpdateSim(gpuContext gpu)
......@@ -53,102 +51,11 @@ void GetVerletUpdateSim(gpuContext gpu)
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
__global__ void kVerletUpdatePart1_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x;
velocity.y += dtOverMass*force.y;
velocity.z += dtOverMass*force.z;
#ifndef DeltaShake
apos.x += velocity.x*cSim.deltaT;
apos.y += velocity.y*cSim.deltaT;
apos.z += velocity.z*cSim.deltaT;
#else
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
#endif
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kVerletUpdatePart1CM_kernel()
{
extern __shared__ float3 sCM[];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
// Include versions of the kernels with and with center of mass motion removal.
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x-sCM[0].x;
velocity.y += dtOverMass*force.y-sCM[0].y;
velocity.z += dtOverMass*force.z-sCM[0].z;
#ifndef DeltaShake
apos.x += velocity.x*cSim.deltaT;
apos.y += velocity.y*cSim.deltaT;
apos.z += velocity.z*cSim.deltaT;
#else
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
#endif
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
#include "kVerletUpdate.h"
#define REMOVE_CM
#include "kVerletUpdate.h"
void kVerletUpdatePart1(gpuContext gpu)
{
......@@ -166,105 +73,6 @@ void kVerletUpdatePart1(gpuContext gpu)
}
}
__global__ void kVerletUpdatePart2_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
#ifndef DeltaShake
velocity.x = cSim.oneOverDeltaT*(xPrime.x-apos.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y-apos.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z-apos.z);
#else
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#endif
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kVerletUpdatePart2CM_kernel()
{
extern __shared__ float3 sCM[];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
float3 CM = {0.0f, 0.0f, 0.0f};
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
float mass = 1.0f / velocity.w;
#ifndef DeltaShake
velocity.x = cSim.oneOverDeltaT*(xPrime.x-apos.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y-apos.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z-apos.z);
#else
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#endif
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
}
void kVerletUpdatePart2(gpuContext gpu)
{
// printf("kVerletUpdatePart2\n");
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Verlet integration. It is included
* several times in kVerletUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__ void kVerletUpdatePart1CM_kernel()
#else
__global__ void kVerletUpdatePart1_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
#endif
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x;
velocity.y += dtOverMass*force.y;
velocity.z += dtOverMass*force.z;
#ifdef REMOVE_CM
velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z;
#endif
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
#ifdef REMOVE_CM
__global__ void kVerletUpdatePart2CM_kernel()
#else
__global__ void kVerletUpdatePart2_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f};
#endif
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#ifdef REMOVE_CM
float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
#endif
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
#ifdef REMOVE_CM
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
#endif
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment