Commit 3749e84b authored by Peter Eastman's avatar Peter Eastman
Browse files

Various cleanups to integration code

parent 1ef6e1d8
......@@ -39,7 +39,7 @@
namespace OpenMM {
/**
* This is an Integrator which simulates a System using the velocity Verlet algorithm.
* This is an Integrator which simulates a System using the leap-frog Verlet algorithm.
*/
class OPENMM_EXPORT VerletIntegrator : public Integrator {
......
......@@ -488,7 +488,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
prevFriction = friction;
prevStepSize = stepSize;
}
kUpdatePart1(gpu);
kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
......@@ -497,7 +497,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
if (step%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
}
kUpdatePart2(gpu);
kLangevinUpdatePart2(gpu);
kApplySecondShake(gpu);
kApplySecondSettle(gpu);
kApplySecondCCMA(gpu);
......
......@@ -40,14 +40,14 @@ extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern void kCalculateLocalForces(gpuContext gpu);
extern void kCalculateAndersenThermostat(gpuContext gpu);
extern void kReduceBornSumAndForces(gpuContext gpu);
extern void kUpdatePart1(gpuContext gpu);
extern void kApplyFirstShake(gpuContext gpu);
extern void kApplyFirstCCMA(gpuContext gpu);
extern void kApplyFirstSettle(gpuContext gpu);
extern void kUpdatePart2(gpuContext gpu);
extern void kApplySecondShake(gpuContext gpu);
extern void kApplySecondCCMA(gpuContext gpu);
extern void kApplySecondSettle(gpuContext gpu);
extern void kLangevinUpdatePart1(gpuContext gpu);
extern void kLangevinUpdatePart2(gpuContext gpu);
extern void kVerletUpdatePart1(gpuContext gpu);
extern void kVerletUpdatePart2(gpuContext gpu);
extern void kBrownianUpdatePart1(gpuContext gpu);
......@@ -72,8 +72,10 @@ extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
extern void GetCalculateAndersenThermostatSim(gpuContext gpu);
extern void SetForcesSim(gpuContext gpu);
extern void GetForcesSim(gpuContext gpu);
extern void SetUpdateShakeHSim(gpuContext gpu);
extern void GetUpdateShakeHSim(gpuContext gpu);
extern void SetShakeHSim(gpuContext gpu);
extern void GetShakeHSim(gpuContext gpu);
extern void SetLangevinUpdateSim(gpuContext gpu);
extern void GetLangevinUpdateSim(gpuContext gpu);
extern void SetSettleSim(gpuContext gpu);
extern void GetSettleSim(gpuContext gpu);
extern void SetCCMASim(gpuContext gpu);
......
......@@ -1757,7 +1757,8 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateObcGbsaForces2Sim(gpu);
SetCalculateAndersenThermostatSim(gpu);
SetForcesSim(gpu);
SetUpdateShakeHSim(gpu);
SetShakeHSim(gpu);
SetLangevinUpdateSim(gpu);
SetVerletUpdateSim(gpu);
SetBrownianUpdateSim(gpu);
SetSettleSim(gpu);
......
......@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#define DeltaShake
static __constant__ cudaGmxSimulation cSim;
void SetBrownianUpdateSim(gpuContext gpu)
......@@ -66,15 +64,9 @@ __global__ void kBrownianUpdatePart1_kernel()
float4 force = cSim.pForce4[pos];
cSim.pOldPosq[pos] = apos;
#ifndef DeltaShake
apos.x += force.x*cSim.GDT + random4a.x;
apos.y += force.y*cSim.GDT + random4a.y;
apos.z += force.z*cSim.GDT + random4a.z;
#else
apos.x = force.x*cSim.GDT + random4a.x;
apos.y = force.y*cSim.GDT + random4a.y;
apos.z = force.z*cSim.GDT + random4a.z;
#endif
cSim.pPosqP[pos] = apos;
pos += blockDim.x * gridDim.x;
}
......@@ -99,11 +91,6 @@ __global__ void kBrownianUpdatePart2_kernel()
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
#ifndef DeltaShake
velocity.x = cSim.oneOverDeltaT*(xPrime.x-apos.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y-apos.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z-apos.z);
#else
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
......@@ -111,7 +98,7 @@ __global__ void kBrownianUpdatePart2_kernel()
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#endif
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
using namespace std;
#include "gputypes.h"
static __constant__ cudaGmxSimulation cSim;
void SetLangevinUpdateSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetLangevinUpdateSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
// Include versions of the kernels with and with center of mass motion removal.
#include "kLangevinUpdate.h"
#define REMOVE_CM
#include "kLangevinUpdate.h"
void kLangevinUpdatePart1(gpuContext gpu)
{
// printf("kLangevinUpdatePart1\n");
if (gpu->bRemoveCM)
{
kLangevinUpdatePart1CM_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block, gpu->sim.update_threads_per_block * sizeof(float3)>>>();
LAUNCHERROR("kLangevinUpdatePart1CM");
gpu->bRemoveCM = false;
}
else
{
kLangevinUpdatePart1_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kLangevinUpdatePart1");
}
}
extern void kGenerateRandoms(gpuContext gpu);
void kLangevinUpdatePart2(gpuContext gpu)
{
// printf("kLangevinUpdatePart2\n");
if (gpu->bCalculateCM)
{
kLangevinUpdatePart2CM_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block, gpu->sim.update_threads_per_block * sizeof(float3)>>>();
LAUNCHERROR("kLangevinUpdatePart2CM");
gpu->bCalculateCM = false;
gpu->bRemoveCM = true;
}
else
{
kLangevinUpdatePart2_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kLangevinUpdatePart2");
}
// Update randoms if necessary
gpu->iterations++;
if (gpu->iterations == gpu->sim.randomIterations)
{
kGenerateRandoms(gpu);
gpu->iterations = 0;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Langevin integration. It is included
* several times in kLangevinUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__ void kLangevinUpdatePart1CM_kernel()
#else
__global__ void kLangevinUpdatePart1_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
#endif
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xVector = cSim.pxVector4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float2 random2a = cSim.pRandom2a[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos];
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * cSim.DOverTauC + sqrtInvMass * random4a.x;
Vmh.y = xVector.y * cSim.DOverTauC + sqrtInvMass * random4a.y;
Vmh.z = xVector.z * cSim.DOverTauC + sqrtInvMass * random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * random4a.w;
vVector.y = sqrtInvMass * random2a.x;
vVector.z = sqrtInvMass * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * cSim.EM +
velocity.w * force.x * cSim.TauOneMinusEM +
vVector.x -
cSim.EM * Vmh.x;
velocity.y = velocity.y * cSim.EM +
velocity.w * force.y * cSim.TauOneMinusEM +
vVector.y -
cSim.EM * Vmh.y;
velocity.z = velocity.z * cSim.EM +
velocity.w * force.z * cSim.TauOneMinusEM +
vVector.z -
cSim.EM * Vmh.z;
#ifdef REMOVE_CM
velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z;
#endif
cSim.pOldPosq[pos] = apos;
apos.x = velocity.x * cSim.fix1;
apos.y = velocity.y * cSim.fix1;
apos.z = velocity.z * cSim.fix1;
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
#ifdef REMOVE_CM
__global__ void kLangevinUpdatePart2CM_kernel()
#else
__global__ void kLangevinUpdatePart2_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f};
__syncthreads();
#endif
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xPrime = cSim.pPosqP[pos];
float4 vVector = cSim.pvVector4[pos];
float4 xVector;
float4 random4b = cSim.pRandom4b[rpos + pos];
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float sqrtInvMass = sqrt(velocity.w);
velocity.x = xPrime.x * cSim.oneOverFix1;
velocity.y = xPrime.y * cSim.oneOverFix1;
velocity.z = xPrime.z * cSim.oneOverFix1;
#ifdef REMOVE_CM
float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
#endif;
Xmh.x = vVector.x * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.x;
Xmh.y = vVector.y * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.y;
Xmh.z = vVector.z * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.z;
xVector.x = sqrtInvMass * random4b.w;
xVector.y = sqrtInvMass * random2b.x;
xVector.z = sqrtInvMass * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
cSim.pxVector4[pos] = xVector;
pos += blockDim.x * gridDim.x;
}
// Update random position pointer
if (threadIdx.x == 0)
{
rpos += cSim.paddedNumberOfAtoms;
if (rpos > cSim.randoms)
rpos -= cSim.randoms;
cSim.pRandomPosition[blockIdx.x] = rpos;
}
#ifdef REMOVE_CM
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
#endif
}
......@@ -33,8 +33,6 @@
//#include <fstream>
using namespace std;
#define DeltaShake
#include "gputypes.h"
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using namespace std;
#define DeltaShake
#include "gputypes.h"
struct Atom
{
float3 rij1;
float3 rij2;
float3 rij3;
float M;
float d2;
float InvMassI;
float rij1sq;
float rij2sq;
float rij3sq;
};
static __constant__ cudaGmxSimulation cSim;
void SetUpdateShakeHSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetUpdateShakeHSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
__global__ void kUpdatePart1_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
__syncthreads();
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xVector = cSim.pxVector4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float2 random2a = cSim.pRandom2a[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos];
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * cSim.DOverTauC + sqrtInvMass * random4a.x;
Vmh.y = xVector.y * cSim.DOverTauC + sqrtInvMass * random4a.y;
Vmh.z = xVector.z * cSim.DOverTauC + sqrtInvMass * random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * random4a.w;
vVector.y = sqrtInvMass * random2a.x;
vVector.z = sqrtInvMass * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * cSim.EM +
velocity.w * force.x * cSim.TauOneMinusEM +
vVector.x -
cSim.EM * Vmh.x;
velocity.y = velocity.y * cSim.EM +
velocity.w * force.y * cSim.TauOneMinusEM +
vVector.y -
cSim.EM * Vmh.y;
velocity.z = velocity.z * cSim.EM +
velocity.w * force.z * cSim.TauOneMinusEM +
vVector.z -
cSim.EM * Vmh.z;
cSim.pOldPosq[pos] = apos;
#ifndef DeltaShake
apos.x += velocity.x * cSim.fix1;
apos.y += velocity.y * cSim.fix1;
apos.z += velocity.z * cSim.fix1;
#else
apos.x = velocity.x * cSim.fix1;
apos.y = velocity.y * cSim.fix1;
apos.z = velocity.z * cSim.fix1;
#endif
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kUpdatePart1CM_kernel()
{
extern __shared__ float3 sCM[];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
#if 0
float4 CM2 = { 0.0f, 0.0f, 0.0f, 0.0f };
float4 CM3 = { 0.0f, 0.0f, 0.0f, 0.0f };
float4 CM4 = { 0.0f, 0.0f, 0.0f, 0.0f };
if (cpos < gridDim.x)
CM1 = cSim.pLinearMomentum[cpos];
cpos += gridDim.x;
if (cpos < gridDim.x)
CM2 = cSim.pLinearMomentum[cpos];
cpos += gridDim.x;
if (cpos < gridDim.x)
CM3 = cSim.pLinearMomentum[cpos];
cpos += gridDim.x;
if (cpos < gridDim.x)
CM4 = cSim.pLinearMomentum[cpos];
sCM[threadIdx.x].x = CM1.x + CM2.x + CM3.x + CM4.x;
sCM[threadIdx.x].y = CM1.y + CM2.y + CM3.y + CM4.y;
sCM[threadIdx.x].z = CM1.z + CM2.z + CM3.z + CM4.z;
#else
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
#endif
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xVector = cSim.pxVector4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float2 random2a = cSim.pRandom2a[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos];
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * cSim.DOverTauC + sqrtInvMass * random4a.x;
Vmh.y = xVector.y * cSim.DOverTauC + sqrtInvMass * random4a.y;
Vmh.z = xVector.z * cSim.DOverTauC + sqrtInvMass * random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * random4a.w;
vVector.y = sqrtInvMass * random2a.x;
vVector.z = sqrtInvMass * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * cSim.EM +
velocity.w * force.x * cSim.TauOneMinusEM +
vVector.x -
cSim.EM * Vmh.x -
sCM[0].x;
velocity.y = velocity.y * cSim.EM +
velocity.w * force.y * cSim.TauOneMinusEM +
vVector.y -
cSim.EM * Vmh.y -
sCM[0].y;
velocity.z = velocity.z * cSim.EM +
velocity.w * force.z * cSim.TauOneMinusEM +
vVector.z -
cSim.EM * Vmh.z -
sCM[0].z;
cSim.pOldPosq[pos] = apos;
#ifndef DeltaShake
apos.x += velocity.x * cSim.fix1;
apos.y += velocity.y * cSim.fix1;
apos.z += velocity.z * cSim.fix1;
#else
apos.x = velocity.x * cSim.fix1;
apos.y = velocity.y * cSim.fix1;
apos.z = velocity.z * cSim.fix1;
#endif
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
void kUpdatePart1(gpuContext gpu)
{
// printf("kUpdatePart1\n");
#if 0
static int iteration = 0;
if (iteration == 0)
{
gpu->psPosq4->Download();
gpu->psVelm4->Download();
printf("# %d atoms\n", gpu->natoms);
for (int i = 0; i < gpu->natoms; i++)
{
printf("%5d %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosq4->_pSysStream[0][i].x, gpu->psPosq4->_pSysStream[0][i].y,
gpu->psPosq4->_pSysStream[0][i].z, gpu->psPosq4->_pSysStream[0][i].w,
gpu->psVelm4->_pSysStream[0][i].x, gpu->psVelm4->_pSysStream[0][i].y,
gpu->psVelm4->_pSysStream[0][i].z, gpu->psVelm4->_pSysStream[0][i].w
);
}
}
iteration++;
#endif
#if 0
static const float KILO = 1e3; // Thousand
static const float BOLTZMANN = 1.380658e-23f; // (J/K)
static const float AVOGADRO = 6.0221367e23f; // ()
static const float RGAS = BOLTZMANN * AVOGADRO; // (J/(mol K))
static const float BOLTZ = (RGAS / KILO); // (kJ/(mol K))
static int iteration = 0;
// Check T
if (iteration % 1000 == 0)
{
gpu->psVelm4->Download();
float ke = 0.0f;
for (int i = 0; i < gpu->natoms; i++)
{
float vx = gpu->psVelm4->_pSysStream[0][i].x;
float vy = gpu->psVelm4->_pSysStream[0][i].y;
float vz = gpu->psVelm4->_pSysStream[0][i].z;
float m = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
ke += m * (vx * vx + vy * vy + vz * vz);
}
float T = ke / (BOLTZ * gpu->sim.degreesOfFreedom);
printf("Iteration %d, Temperature is %f\n", iteration, T);
}
iteration++;
#endif
if (gpu->bRemoveCM)
{
kUpdatePart1CM_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block, gpu->sim.update_threads_per_block * sizeof(float3)>>>();
LAUNCHERROR("kUpdatePart1CM");
gpu->bRemoveCM = false;
#if 0
gpu->psLinearMomentum->Download();
gpu->psVelm4->Download();
float3 mv = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->natoms; i++)
{
float mass = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
mv.x += mass * gpu->psVelm4->_pSysStream[0][i].x;
mv.y += mass * gpu->psVelm4->_pSysStream[0][i].y;
mv.z += mass * gpu->psVelm4->_pSysStream[0][i].z;
}
mv.x *= gpu->sim.inverseTotalMass;
mv.y *= gpu->sim.inverseTotalMass;
mv.z *= gpu->sim.inverseTotalMass;
float3 mv1 = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->sim.blocks; i++)
{
mv1.x += gpu->psLinearMomentum->_pSysStream[0][i].x;
mv1.y += gpu->psLinearMomentum->_pSysStream[0][i].y;
mv1.z += gpu->psLinearMomentum->_pSysStream[0][i].z;
}
printf("%11.5f %11.5f %11.5f | %11.5f %11.5f %11.5f\n", mv.x, mv.y, mv.z, mv1.x, mv1.y, mv1.z);
#endif
}
else
{
kUpdatePart1_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kUpdatePart1");
}
}
__global__ void kApplyFirstShake_kernel()
{
__shared__ Atom sA[G8X_THREADS_PER_BLOCK];
Atom* psA = &sA[threadIdx.x];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.ShakeConstraints)
{
int4 atomID = cSim.pShakeID[pos];
float4 params = cSim.pShakeParameter[pos];
float4 apos = cSim.pOldPosq[atomID.x];
float4 xpi = cSim.pPosqP[atomID.x];
float4 apos1 = cSim.pOldPosq[atomID.y];
float4 xpj1 = cSim.pPosqP[atomID.y];
float4 apos2 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
psA->InvMassI = params.x;
psA->M = params.y;
psA->d2 = params.z;
float invMassJ = params.w;
if (atomID.z != -1)
{
apos2 = cSim.pOldPosq[atomID.z];
xpj2 = cSim.pPosqP[atomID.z];
}
float4 apos3 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
if (atomID.w != -1)
{
apos3 = cSim.pOldPosq[atomID.w];
xpj3 = cSim.pPosqP[atomID.w];
}
float3 xi, xj1, xj2, xj3;
xi.x = apos.x;
xi.y = apos.y;
xi.z = apos.z;
xj1.x = apos1.x;
xj1.y = apos1.y;
xj1.z = apos1.z;
xj2.x = apos2.x;
xj2.y = apos2.y;
xj2.z = apos2.z;
xj3.x = apos3.x;
xj3.y = apos3.y;
xj3.z = apos3.z;
#ifndef DeltaShake
xpi.x -= xi.x;
xpi.y -= xi.y;
xpi.z -= xi.z;
xpj1.x -= xj1.x;
xpj1.y -= xj1.y;
xpj1.z -= xj1.z;
xpj2.x -= xj2.x;
xpj2.y -= xj2.y;
xpj2.z -= xj2.z;
xpj3.x -= xj3.x;
xpj3.y -= xj3.y;
xpj3.z -= xj3.z;
#endif
psA->rij1.x = xi.x - xj1.x;
psA->rij1.y = xi.y - xj1.y;
psA->rij1.z = xi.z - xj1.z;
psA->rij2.x = xi.x - xj2.x;
psA->rij2.y = xi.y - xj2.y;
psA->rij2.z = xi.z - xj2.z;
psA->rij3.x = xi.x - xj3.x;
psA->rij3.y = xi.y - xj3.y;
psA->rij3.z = xi.z - xj3.z;
psA->rij1sq = psA->rij1.x * psA->rij1.x + psA->rij1.y * psA->rij1.y + psA->rij1.z * psA->rij1.z;
psA->rij2sq = psA->rij2.x * psA->rij2.x + psA->rij2.y * psA->rij2.y + psA->rij2.z * psA->rij2.z;
psA->rij3sq = psA->rij3.x * psA->rij3.x + psA->rij3.y * psA->rij3.y + psA->rij3.z * psA->rij3.z;
float ld1 = psA->d2 - psA->rij1sq;
float ld2 = psA->d2 - psA->rij2sq;
float ld3 = psA->d2 - psA->rij3sq;
bool converged = false;
int iteration = 0;
while (iteration < 15 && !converged)
{
converged = true;
float3 rpij;
rpij.x = xpi.x - xpj1.x;
rpij.y = xpi.y - xpj1.y;
rpij.z = xpi.z - xpj1.z;
float rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
float rrpr = psA->rij1.x * rpij.x + psA->rij1.y * rpij.y + psA->rij1.z * rpij.z;
float diff = fabs(ld1 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance);
if (diff >= 1.0f)
{
float acor = (ld1 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij1sq);
float3 dr;
dr.x = psA->rij1.x * acor;
dr.y = psA->rij1.y * acor;
dr.z = psA->rij1.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj1.x -= dr.x * invMassJ;
xpj1.y -= dr.y * invMassJ;
xpj1.z -= dr.z * invMassJ;
converged = false;
}
if (atomID.z != -1)
{
rpij.x = xpi.x - xpj2.x;
rpij.y = xpi.y - xpj2.y;
rpij.z = xpi.z - xpj2.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij2.x * rpij.x + psA->rij2.y * rpij.y + psA->rij2.z * rpij.z;
diff = fabs(ld2 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance);
if (diff >= 1.0f)
{
float acor = (ld2 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij2sq);
float3 dr;
dr.x = psA->rij2.x * acor;
dr.y = psA->rij2.y * acor;
dr.z = psA->rij2.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj2.x -= dr.x * invMassJ;
xpj2.y -= dr.y * invMassJ;
xpj2.z -= dr.z * invMassJ;
converged = false;
}
}
if (atomID.w != -1)
{
rpij.x = xpi.x - xpj3.x;
rpij.y = xpi.y - xpj3.y;
rpij.z = xpi.z - xpj3.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij3.x * rpij.x + psA->rij3.y * rpij.y + psA->rij3.z * rpij.z;
diff = fabs(ld3 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance);
if (diff >= 1.0f)
{
float acor = (ld3 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij3sq);
float3 dr;
dr.x = psA->rij3.x * acor;
dr.y = psA->rij3.y * acor;
dr.z = psA->rij3.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj3.x -= dr.x * invMassJ;
xpj3.y -= dr.y * invMassJ;
xpj3.z -= dr.z * invMassJ;
converged = false;
}
}
iteration++;
}
#ifndef DeltaShake
xpi.x += xi.x;
xpi.y += xi.y;
xpi.z += xi.z;
xpj1.x += xj1.x;
xpj1.y += xj1.y;
xpj1.z += xj1.z;
xpj2.x += xj2.x;
xpj2.y += xj2.y;
xpj2.z += xj2.z;
xpj3.x += xj3.x;
xpj3.y += xj3.y;
xpj3.z += xj3.z;
#endif
cSim.pPosqP[atomID.x] = xpi;
cSim.pPosqP[atomID.y] = xpj1;
if (atomID.z != -1)
cSim.pPosqP[atomID.z] = xpj2;
if (atomID.w != -1)
cSim.pPosqP[atomID.w] = xpj3;
pos += blockDim.x * gridDim.x;
}
}
void kApplyFirstShake(gpuContext gpu)
{
// printf("kApplyFirstShake\n");
if (gpu->sim.ShakeConstraints > 0)
{
kApplyFirstShake_kernel<<<gpu->sim.blocks, gpu->sim.shake_threads_per_block>>>();
LAUNCHERROR("kApplyFirstShake");
}
}
__global__ void kUpdatePart2_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
__syncthreads();
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
#ifndef DeltaShake
float4 apos = cSim.pPosq[pos];
#endif
float4 xPrime = cSim.pPosqP[pos];
float4 vVector = cSim.pvVector4[pos];
float4 xVector;
float4 random4b = cSim.pRandom4b[rpos + pos];
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float sqrtInvMass = sqrt(velocity.w);
#ifdef DeltaShake
velocity.x = xPrime.x * cSim.oneOverFix1;
velocity.y = xPrime.y * cSim.oneOverFix1;
velocity.z = xPrime.z * cSim.oneOverFix1;
#else
velocity.x = (xPrime.x - apos.x) * cSim.oneOverFix1;
velocity.y = (xPrime.y - apos.y) * cSim.oneOverFix1;
velocity.z = (xPrime.z - apos.z) * cSim.oneOverFix1;
#endif
Xmh.x = vVector.x * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.x;
Xmh.y = vVector.y * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.y;
Xmh.z = vVector.z * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.z;
xVector.x = sqrtInvMass * random4b.w;
xVector.y = sqrtInvMass * random2b.x;
xVector.z = sqrtInvMass * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
cSim.pxVector4[pos] = xVector;
pos += blockDim.x * gridDim.x;
}
// Update random position pointer
if (threadIdx.x == 0)
{
rpos += cSim.paddedNumberOfAtoms;
if (rpos > cSim.randoms)
rpos -= cSim.randoms;
cSim.pRandomPosition[blockIdx.x] = rpos;
}
}
__global__ void kUpdatePart2CM_kernel()
{
extern __shared__ float3 sCM[];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
float3 CM = {0.0f, 0.0f, 0.0f};
__syncthreads();
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
#ifndef DeltaShake
float4 apos = cSim.pPosq[pos];
#endif
float4 xPrime = cSim.pPosqP[pos];
float4 vVector = cSim.pvVector4[pos];
float4 xVector;
float4 random4b = cSim.pRandom4b[rpos + pos];
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float mass = 1.0f / velocity.w;
float sqrtInvMass = sqrt(velocity.w);
#ifdef DeltaShake
velocity.x = xPrime.x * cSim.oneOverFix1;
velocity.y = xPrime.y * cSim.oneOverFix1;
velocity.z = xPrime.z * cSim.oneOverFix1;
#else
velocity.x = (xPrime.x - apos.x) * cSim.oneOverFix1;
velocity.y = (xPrime.y - apos.y) * cSim.oneOverFix1;
velocity.z = (xPrime.z - apos.z) * cSim.oneOverFix1;
#endif
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
Xmh.x = vVector.x * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.x;
Xmh.y = vVector.y * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.y;
Xmh.z = vVector.z * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.z;
xVector.x = sqrtInvMass * random4b.w;
xVector.y = sqrtInvMass * random2b.x;
xVector.z = sqrtInvMass * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
cSim.pxVector4[pos] = xVector;
pos += blockDim.x * gridDim.x;
}
// Update random position pointer
if (threadIdx.x == 0)
{
rpos += cSim.paddedNumberOfAtoms;
if (rpos > cSim.randoms)
rpos -= cSim.randoms;
cSim.pRandomPosition[blockIdx.x] = rpos;
}
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
}
extern void kGenerateRandoms(gpuContext gpu);
void kUpdatePart2(gpuContext gpu)
{
// printf("kUpdatePart2\n");
if (gpu->bCalculateCM)
{
kUpdatePart2CM_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block, gpu->sim.update_threads_per_block * sizeof(float3)>>>();
LAUNCHERROR("kUpdatePart2CM");
gpu->bCalculateCM = false;
gpu->bRemoveCM = true;
#if 0
gpu->psLinearMomentum->Download();
gpu->psVelm4->Download();
float3 mv = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->natoms; i++)
{
float mass = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
mv.x += mass * gpu->psVelm4->_pSysStream[0][i].x;
mv.y += mass * gpu->psVelm4->_pSysStream[0][i].y;
mv.z += mass * gpu->psVelm4->_pSysStream[0][i].z;
}
mv.x *= gpu->sim.inverseTotalMass;
mv.y *= gpu->sim.inverseTotalMass;
mv.z *= gpu->sim.inverseTotalMass;
float3 mv1 = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->sim.blocks; i++)
{
mv1.x += gpu->psLinearMomentum->_pSysStream[0][i].x;
mv1.y += gpu->psLinearMomentum->_pSysStream[0][i].y;
mv1.z += gpu->psLinearMomentum->_pSysStream[0][i].z;
}
printf("%11.5f %11.5f %11.5f | %11.5f %11.5f %11.5f\n", mv.x, mv.y, mv.z, mv1.x, mv1.y, mv1.z);
#endif
}
else
{
kUpdatePart2_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kUpdatePart2");
}
// Update randoms if necessary
gpu->iterations++;
if (gpu->iterations == gpu->sim.randomIterations)
{
kGenerateRandoms(gpu);
gpu->iterations = 0;
}
}
__global__ void kApplySecondShake_kernel()
{
__shared__ Atom sA[G8X_THREADS_PER_BLOCK];
Atom* psA = &sA[threadIdx.x];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.ShakeConstraints)
{
int4 atomID = cSim.pShakeID[pos];
float4 params = cSim.pShakeParameter[pos];
float4 apos = cSim.pOldPosq[atomID.x];
float4 xpi = cSim.pPosq[atomID.x];
float4 apos1 = cSim.pOldPosq[atomID.y];
float4 xpj1 = cSim.pPosq[atomID.y];
float4 apos2 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
psA->InvMassI = params.x;
psA->M = params.y;
psA->d2 = params.z;
float invMassJ = params.w;
if (atomID.z != -1)
{
apos2 = cSim.pOldPosq[atomID.z];
xpj2 = cSim.pPosq[atomID.z];
}
float4 apos3 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
if (atomID.w != -1)
{
apos3 = cSim.pOldPosq[atomID.w];
xpj3 = cSim.pPosq[atomID.w];
}
float3 xi, xj1, xj2, xj3;
xi.x = apos.x;
xi.y = apos.y;
xi.z = apos.z;
xj1.x = apos1.x;
xj1.y = apos1.y;
xj1.z = apos1.z;
xj2.x = apos2.x;
xj2.y = apos2.y;
xj2.z = apos2.z;
xj3.x = apos3.x;
xj3.y = apos3.y;
xj3.z = apos3.z;
#ifndef DeltaShake
xpi.x -= xi.x;
xpi.y -= xi.y;
xpi.z -= xi.z;
xpj1.x -= xj1.x;
xpj1.y -= xj1.y;
xpj1.z -= xj1.z;
xpj2.x -= xj2.x;
xpj2.y -= xj2.y;
xpj2.z -= xj2.z;
xpj3.x -= xj3.x;
xpj3.y -= xj3.y;
xpj3.z -= xj3.z;
#endif
psA->rij1.x = xi.x - xj1.x;
psA->rij1.y = xi.y - xj1.y;
psA->rij1.z = xi.z - xj1.z;
psA->rij2.x = xi.x - xj2.x;
psA->rij2.y = xi.y - xj2.y;
psA->rij2.z = xi.z - xj2.z;
psA->rij3.x = xi.x - xj3.x;
psA->rij3.y = xi.y - xj3.y;
psA->rij3.z = xi.z - xj3.z;
psA->rij1sq = psA->rij1.x * psA->rij1.x + psA->rij1.y * psA->rij1.y + psA->rij1.z * psA->rij1.z;
psA->rij2sq = psA->rij2.x * psA->rij2.x + psA->rij2.y * psA->rij2.y + psA->rij2.z * psA->rij2.z;
psA->rij3sq = psA->rij3.x * psA->rij3.x + psA->rij3.y * psA->rij3.y + psA->rij3.z * psA->rij3.z;
float ld1 = psA->d2 - psA->rij1sq;
float ld2 = psA->d2 - psA->rij2sq;
float ld3 = psA->d2 - psA->rij3sq;
bool converged = false;
int iteration = 0;
while (iteration < 15 && !converged)
{
converged = true;
float3 rpij;
rpij.x = xpi.x - xpj1.x;
rpij.y = xpi.y - xpj1.y;
rpij.z = xpi.z - xpj1.z;
float rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
float rrpr = psA->rij1.x * rpij.x + psA->rij1.y * rpij.y + psA->rij1.z * rpij.z;
float diff = fabs(ld1 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance );
if (diff >= 1.0f)
{
float acor = (ld1 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij1sq);
float3 dr;
dr.x = psA->rij1.x * acor;
dr.y = psA->rij1.y * acor;
dr.z = psA->rij1.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj1.x -= dr.x * invMassJ;
xpj1.y -= dr.y * invMassJ;
xpj1.z -= dr.z * invMassJ;
converged = false;
}
if (atomID.z != -1)
{
rpij.x = xpi.x - xpj2.x;
rpij.y = xpi.y - xpj2.y;
rpij.z = xpi.z - xpj2.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij2.x * rpij.x + psA->rij2.y * rpij.y + psA->rij2.z * rpij.z;
diff = fabs(ld2 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance );
if (diff >= 1.0f)
{
float acor = (ld2 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij2sq);
float3 dr;
dr.x = psA->rij2.x * acor;
dr.y = psA->rij2.y * acor;
dr.z = psA->rij2.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj2.x -= dr.x * invMassJ;
xpj2.y -= dr.y * invMassJ;
xpj2.z -= dr.z * invMassJ;
converged = false;
}
}
if (atomID.w != -1)
{
rpij.x = xpi.x - xpj3.x;
rpij.y = xpi.y - xpj3.y;
rpij.z = xpi.z - xpj3.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij3.x * rpij.x + psA->rij3.y * rpij.y + psA->rij3.z * rpij.z;
diff = fabs(ld3 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance );
if (diff >= 1.0f)
{
float acor = (ld3 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij3sq);
float3 dr;
dr.x = psA->rij3.x * acor;
dr.y = psA->rij3.y * acor;
dr.z = psA->rij3.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj3.x -= dr.x * invMassJ;
xpj3.y -= dr.y * invMassJ;
xpj3.z -= dr.z * invMassJ;
converged = false;
}
}
iteration++;
}
xpi.x += xi.x;
xpi.y += xi.y;
xpi.z += xi.z;
xpj1.x += xj1.x;
xpj1.y += xj1.y;
xpj1.z += xj1.z;
xpj2.x += xj2.x;
xpj2.y += xj2.y;
xpj2.z += xj2.z;
xpj3.x += xj3.x;
xpj3.y += xj3.y;
xpj3.z += xj3.z;
cSim.pPosq[atomID.x] = xpi;
cSim.pPosq[atomID.y] = xpj1;
if (atomID.z != -1)
cSim.pPosq[atomID.z] = xpj2;
if (atomID.w != -1)
cSim.pPosq[atomID.w] = xpj3;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kApplyNoShake_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.NonShakeConstraints)
{
int atomID = cSim.pNonShakeID[pos];
float4 apos = cSim.pOldPosq[atomID];
float4 xpi = cSim.pPosq[atomID];
xpi.x += apos.x;
xpi.y += apos.y;
xpi.z += apos.z;
cSim.pPosq[atomID] = xpi;
pos += blockDim.x * gridDim.x;
}
}
void kCPUShake2(gpuContext gpu)
{
}
void kApplySecondShake(gpuContext gpu)
{
// printf("kApplySecondShake\n");
// kCPUShake2(gpu);
if (gpu->sim.ShakeConstraints > 0)
{
kApplySecondShake_kernel<<<gpu->sim.blocks, gpu->sim.shake_threads_per_block>>>();
LAUNCHERROR("kApplySecondShake");
}
// handle non-Shake atoms
#ifdef DeltaShake
if (gpu->sim.NonShakeConstraints > 0)
{
//fprintf( gpu->log, "kApplyNoShake_kernel %d %d \n", gpu->sim.blocks, gpu->sim.nonshake_threads_per_block); fflush( gpu->log );
kApplyNoShake_kernel<<<gpu->sim.blocks, gpu->sim.nonshake_threads_per_block>>>();
LAUNCHERROR("kApplyNoShake");
}
#endif
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
using namespace std;
#include "gputypes.h"
struct Atom
{
float3 rij1;
float3 rij2;
float3 rij3;
float M;
float d2;
float InvMassI;
float rij1sq;
float rij2sq;
float rij3sq;
};
static __constant__ cudaGmxSimulation cSim;
void SetShakeHSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetShakeHSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
__global__ void kApplyFirstShake_kernel()
{
__shared__ Atom sA[G8X_THREADS_PER_BLOCK];
Atom* psA = &sA[threadIdx.x];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.ShakeConstraints)
{
int4 atomID = cSim.pShakeID[pos];
float4 params = cSim.pShakeParameter[pos];
float4 apos = cSim.pOldPosq[atomID.x];
float4 xpi = cSim.pPosqP[atomID.x];
float4 apos1 = cSim.pOldPosq[atomID.y];
float4 xpj1 = cSim.pPosqP[atomID.y];
float4 apos2 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
psA->InvMassI = params.x;
psA->M = params.y;
psA->d2 = params.z;
float invMassJ = params.w;
if (atomID.z != -1)
{
apos2 = cSim.pOldPosq[atomID.z];
xpj2 = cSim.pPosqP[atomID.z];
}
float4 apos3 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
if (atomID.w != -1)
{
apos3 = cSim.pOldPosq[atomID.w];
xpj3 = cSim.pPosqP[atomID.w];
}
float3 xi, xj1, xj2, xj3;
xi.x = apos.x;
xi.y = apos.y;
xi.z = apos.z;
xj1.x = apos1.x;
xj1.y = apos1.y;
xj1.z = apos1.z;
xj2.x = apos2.x;
xj2.y = apos2.y;
xj2.z = apos2.z;
xj3.x = apos3.x;
xj3.y = apos3.y;
xj3.z = apos3.z;
psA->rij1.x = xi.x - xj1.x;
psA->rij1.y = xi.y - xj1.y;
psA->rij1.z = xi.z - xj1.z;
psA->rij2.x = xi.x - xj2.x;
psA->rij2.y = xi.y - xj2.y;
psA->rij2.z = xi.z - xj2.z;
psA->rij3.x = xi.x - xj3.x;
psA->rij3.y = xi.y - xj3.y;
psA->rij3.z = xi.z - xj3.z;
psA->rij1sq = psA->rij1.x * psA->rij1.x + psA->rij1.y * psA->rij1.y + psA->rij1.z * psA->rij1.z;
psA->rij2sq = psA->rij2.x * psA->rij2.x + psA->rij2.y * psA->rij2.y + psA->rij2.z * psA->rij2.z;
psA->rij3sq = psA->rij3.x * psA->rij3.x + psA->rij3.y * psA->rij3.y + psA->rij3.z * psA->rij3.z;
float ld1 = psA->d2 - psA->rij1sq;
float ld2 = psA->d2 - psA->rij2sq;
float ld3 = psA->d2 - psA->rij3sq;
bool converged = false;
int iteration = 0;
while (iteration < 15 && !converged)
{
converged = true;
float3 rpij;
rpij.x = xpi.x - xpj1.x;
rpij.y = xpi.y - xpj1.y;
rpij.z = xpi.z - xpj1.z;
float rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
float rrpr = psA->rij1.x * rpij.x + psA->rij1.y * rpij.y + psA->rij1.z * rpij.z;
float diff = fabs(ld1 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance);
if (diff >= 1.0f)
{
float acor = (ld1 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij1sq);
float3 dr;
dr.x = psA->rij1.x * acor;
dr.y = psA->rij1.y * acor;
dr.z = psA->rij1.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj1.x -= dr.x * invMassJ;
xpj1.y -= dr.y * invMassJ;
xpj1.z -= dr.z * invMassJ;
converged = false;
}
if (atomID.z != -1)
{
rpij.x = xpi.x - xpj2.x;
rpij.y = xpi.y - xpj2.y;
rpij.z = xpi.z - xpj2.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij2.x * rpij.x + psA->rij2.y * rpij.y + psA->rij2.z * rpij.z;
diff = fabs(ld2 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance);
if (diff >= 1.0f)
{
float acor = (ld2 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij2sq);
float3 dr;
dr.x = psA->rij2.x * acor;
dr.y = psA->rij2.y * acor;
dr.z = psA->rij2.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj2.x -= dr.x * invMassJ;
xpj2.y -= dr.y * invMassJ;
xpj2.z -= dr.z * invMassJ;
converged = false;
}
}
if (atomID.w != -1)
{
rpij.x = xpi.x - xpj3.x;
rpij.y = xpi.y - xpj3.y;
rpij.z = xpi.z - xpj3.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij3.x * rpij.x + psA->rij3.y * rpij.y + psA->rij3.z * rpij.z;
diff = fabs(ld3 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance);
if (diff >= 1.0f)
{
float acor = (ld3 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij3sq);
float3 dr;
dr.x = psA->rij3.x * acor;
dr.y = psA->rij3.y * acor;
dr.z = psA->rij3.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj3.x -= dr.x * invMassJ;
xpj3.y -= dr.y * invMassJ;
xpj3.z -= dr.z * invMassJ;
converged = false;
}
}
iteration++;
}
cSim.pPosqP[atomID.x] = xpi;
cSim.pPosqP[atomID.y] = xpj1;
if (atomID.z != -1)
cSim.pPosqP[atomID.z] = xpj2;
if (atomID.w != -1)
cSim.pPosqP[atomID.w] = xpj3;
pos += blockDim.x * gridDim.x;
}
}
void kApplyFirstShake(gpuContext gpu)
{
// printf("kApplyFirstShake\n");
if (gpu->sim.ShakeConstraints > 0)
{
kApplyFirstShake_kernel<<<gpu->sim.blocks, gpu->sim.shake_threads_per_block>>>();
LAUNCHERROR("kApplyFirstShake");
}
}
__global__ void kApplySecondShake_kernel()
{
__shared__ Atom sA[G8X_THREADS_PER_BLOCK];
Atom* psA = &sA[threadIdx.x];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.ShakeConstraints)
{
int4 atomID = cSim.pShakeID[pos];
float4 params = cSim.pShakeParameter[pos];
float4 apos = cSim.pOldPosq[atomID.x];
float4 xpi = cSim.pPosq[atomID.x];
float4 apos1 = cSim.pOldPosq[atomID.y];
float4 xpj1 = cSim.pPosq[atomID.y];
float4 apos2 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
psA->InvMassI = params.x;
psA->M = params.y;
psA->d2 = params.z;
float invMassJ = params.w;
if (atomID.z != -1)
{
apos2 = cSim.pOldPosq[atomID.z];
xpj2 = cSim.pPosq[atomID.z];
}
float4 apos3 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
if (atomID.w != -1)
{
apos3 = cSim.pOldPosq[atomID.w];
xpj3 = cSim.pPosq[atomID.w];
}
float3 xi, xj1, xj2, xj3;
xi.x = apos.x;
xi.y = apos.y;
xi.z = apos.z;
xj1.x = apos1.x;
xj1.y = apos1.y;
xj1.z = apos1.z;
xj2.x = apos2.x;
xj2.y = apos2.y;
xj2.z = apos2.z;
xj3.x = apos3.x;
xj3.y = apos3.y;
xj3.z = apos3.z;
psA->rij1.x = xi.x - xj1.x;
psA->rij1.y = xi.y - xj1.y;
psA->rij1.z = xi.z - xj1.z;
psA->rij2.x = xi.x - xj2.x;
psA->rij2.y = xi.y - xj2.y;
psA->rij2.z = xi.z - xj2.z;
psA->rij3.x = xi.x - xj3.x;
psA->rij3.y = xi.y - xj3.y;
psA->rij3.z = xi.z - xj3.z;
psA->rij1sq = psA->rij1.x * psA->rij1.x + psA->rij1.y * psA->rij1.y + psA->rij1.z * psA->rij1.z;
psA->rij2sq = psA->rij2.x * psA->rij2.x + psA->rij2.y * psA->rij2.y + psA->rij2.z * psA->rij2.z;
psA->rij3sq = psA->rij3.x * psA->rij3.x + psA->rij3.y * psA->rij3.y + psA->rij3.z * psA->rij3.z;
float ld1 = psA->d2 - psA->rij1sq;
float ld2 = psA->d2 - psA->rij2sq;
float ld3 = psA->d2 - psA->rij3sq;
bool converged = false;
int iteration = 0;
while (iteration < 15 && !converged)
{
converged = true;
float3 rpij;
rpij.x = xpi.x - xpj1.x;
rpij.y = xpi.y - xpj1.y;
rpij.z = xpi.z - xpj1.z;
float rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
float rrpr = psA->rij1.x * rpij.x + psA->rij1.y * rpij.y + psA->rij1.z * rpij.z;
float diff = fabs(ld1 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance );
if (diff >= 1.0f)
{
float acor = (ld1 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij1sq);
float3 dr;
dr.x = psA->rij1.x * acor;
dr.y = psA->rij1.y * acor;
dr.z = psA->rij1.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj1.x -= dr.x * invMassJ;
xpj1.y -= dr.y * invMassJ;
xpj1.z -= dr.z * invMassJ;
converged = false;
}
if (atomID.z != -1)
{
rpij.x = xpi.x - xpj2.x;
rpij.y = xpi.y - xpj2.y;
rpij.z = xpi.z - xpj2.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij2.x * rpij.x + psA->rij2.y * rpij.y + psA->rij2.z * rpij.z;
diff = fabs(ld2 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance );
if (diff >= 1.0f)
{
float acor = (ld2 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij2sq);
float3 dr;
dr.x = psA->rij2.x * acor;
dr.y = psA->rij2.y * acor;
dr.z = psA->rij2.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj2.x -= dr.x * invMassJ;
xpj2.y -= dr.y * invMassJ;
xpj2.z -= dr.z * invMassJ;
converged = false;
}
}
if (atomID.w != -1)
{
rpij.x = xpi.x - xpj3.x;
rpij.y = xpi.y - xpj3.y;
rpij.z = xpi.z - xpj3.z;
rpsqij = rpij.x * rpij.x + rpij.y * rpij.y + rpij.z * rpij.z;
rrpr = psA->rij3.x * rpij.x + psA->rij3.y * rpij.y + psA->rij3.z * rpij.z;
diff = fabs(ld3 - 2.0f * rrpr - rpsqij) / (psA->d2 * cSim.shakeTolerance );
if (diff >= 1.0f)
{
float acor = (ld3 - 2.0f * rrpr - rpsqij) * psA->M / (rrpr + psA->rij3sq);
float3 dr;
dr.x = psA->rij3.x * acor;
dr.y = psA->rij3.y * acor;
dr.z = psA->rij3.z * acor;
xpi.x += dr.x * psA->InvMassI;
xpi.y += dr.y * psA->InvMassI;
xpi.z += dr.z * psA->InvMassI;
xpj3.x -= dr.x * invMassJ;
xpj3.y -= dr.y * invMassJ;
xpj3.z -= dr.z * invMassJ;
converged = false;
}
}
iteration++;
}
xpi.x += xi.x;
xpi.y += xi.y;
xpi.z += xi.z;
xpj1.x += xj1.x;
xpj1.y += xj1.y;
xpj1.z += xj1.z;
xpj2.x += xj2.x;
xpj2.y += xj2.y;
xpj2.z += xj2.z;
xpj3.x += xj3.x;
xpj3.y += xj3.y;
xpj3.z += xj3.z;
cSim.pPosq[atomID.x] = xpi;
cSim.pPosq[atomID.y] = xpj1;
if (atomID.z != -1)
cSim.pPosq[atomID.z] = xpj2;
if (atomID.w != -1)
cSim.pPosq[atomID.w] = xpj3;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kApplyNoShake_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.NonShakeConstraints)
{
int atomID = cSim.pNonShakeID[pos];
float4 apos = cSim.pOldPosq[atomID];
float4 xpi = cSim.pPosq[atomID];
xpi.x += apos.x;
xpi.y += apos.y;
xpi.z += apos.z;
cSim.pPosq[atomID] = xpi;
pos += blockDim.x * gridDim.x;
}
}
void kApplySecondShake(gpuContext gpu)
{
// printf("kApplySecondShake\n");
if (gpu->sim.ShakeConstraints > 0)
{
kApplySecondShake_kernel<<<gpu->sim.blocks, gpu->sim.shake_threads_per_block>>>();
LAUNCHERROR("kApplySecondShake");
}
// handle non-Shake atoms
if (gpu->sim.NonShakeConstraints > 0)
{
kApplyNoShake_kernel<<<gpu->sim.blocks, gpu->sim.nonshake_threads_per_block>>>();
LAUNCHERROR("kApplyNoShake");
}
}
......@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#define DeltaShake
static __constant__ cudaGmxSimulation cSim;
void SetVerletUpdateSim(gpuContext gpu)
......@@ -53,102 +51,11 @@ void GetVerletUpdateSim(gpuContext gpu)
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
__global__ void kVerletUpdatePart1_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x;
velocity.y += dtOverMass*force.y;
velocity.z += dtOverMass*force.z;
#ifndef DeltaShake
apos.x += velocity.x*cSim.deltaT;
apos.y += velocity.y*cSim.deltaT;
apos.z += velocity.z*cSim.deltaT;
#else
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
#endif
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kVerletUpdatePart1CM_kernel()
{
extern __shared__ float3 sCM[];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x-sCM[0].x;
velocity.y += dtOverMass*force.y-sCM[0].y;
velocity.z += dtOverMass*force.z-sCM[0].z;
// Include versions of the kernels with and with center of mass motion removal.
#ifndef DeltaShake
apos.x += velocity.x*cSim.deltaT;
apos.y += velocity.y*cSim.deltaT;
apos.z += velocity.z*cSim.deltaT;
#else
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
#endif
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
#include "kVerletUpdate.h"
#define REMOVE_CM
#include "kVerletUpdate.h"
void kVerletUpdatePart1(gpuContext gpu)
{
......@@ -166,105 +73,6 @@ void kVerletUpdatePart1(gpuContext gpu)
}
}
__global__ void kVerletUpdatePart2_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
#ifndef DeltaShake
velocity.x = cSim.oneOverDeltaT*(xPrime.x-apos.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y-apos.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z-apos.z);
#else
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#endif
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kVerletUpdatePart2CM_kernel()
{
extern __shared__ float3 sCM[];
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
float3 CM = {0.0f, 0.0f, 0.0f};
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
float mass = 1.0f / velocity.w;
#ifndef DeltaShake
velocity.x = cSim.oneOverDeltaT*(xPrime.x-apos.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y-apos.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z-apos.z);
#else
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#endif
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
}
void kVerletUpdatePart2(gpuContext gpu)
{
// printf("kVerletUpdatePart2\n");
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Verlet integration. It is included
* several times in kVerletUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__ void kVerletUpdatePart1CM_kernel()
#else
__global__ void kVerletUpdatePart1_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = { 0.0f, 0.0f, 0.0f};
float4 CM1 = { 0.0f, 0.0f, 0.0f, 0.0f };
// Read CM outputs from previous step
unsigned int cpos = threadIdx.x;
while (cpos < gridDim.x)
{
CM1 = cSim.pLinearMomentum[cpos];
CM.x += CM1.x;
CM.y += CM1.y;
CM.z += CM1.z;
cpos += blockDim.x;
}
sCM[threadIdx.x].x = CM.x;
sCM[threadIdx.x].y = CM.y;
sCM[threadIdx.x].z = CM.z;
__syncthreads();
// Reduce CM
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
#endif
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x;
velocity.y += dtOverMass*force.y;
velocity.z += dtOverMass*force.z;
#ifdef REMOVE_CM
velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z;
#endif
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
}
#ifdef REMOVE_CM
__global__ void kVerletUpdatePart2CM_kernel()
#else
__global__ void kVerletUpdatePart2_kernel()
#endif
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f};
#endif
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
xPrime.z += apos.z;
#ifdef REMOVE_CM
float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x;
CM.y += mass * velocity.y;
CM.z += mass * velocity.z;
#endif
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
#ifdef REMOVE_CM
// Scale CM
CM.x *= cSim.inverseTotalMass;
CM.y *= cSim.inverseTotalMass;
CM.z *= cSim.inverseTotalMass;
sCM[threadIdx.x] = CM;
__syncthreads();
// Reduce CM for CTA
unsigned int offset = 1;
unsigned int mask = 1;
while (offset < blockDim.x)
{
if (((threadIdx.x & mask) == 0) && (threadIdx.x + offset < blockDim.x))
{
sCM[threadIdx.x].x += sCM[threadIdx.x + offset].x;
sCM[threadIdx.x].y += sCM[threadIdx.x + offset].y;
sCM[threadIdx.x].z += sCM[threadIdx.x + offset].z;
}
mask = 2 * mask + 1;
offset *= 2;
__syncthreads();
}
if (threadIdx.x == 0)
{
float4 CM;
CM.x = sCM[0].x;
CM.y = sCM[0].y;
CM.z = sCM[0].z;
CM.w = 0.0f;
cSim.pLinearMomentum[blockIdx.x] = CM;
}
#endif
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment