Unverified Commit fd7c5465 authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Initial Nose Hoover common implementation

parent fd263401
......@@ -945,6 +945,116 @@ private:
ComputeKernel kernel1, kernel2, kernel3, kernel4;
};
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class CommonIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
public:
CommonIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, ComputeContext& cc) :
IntegrateVelocityVerletStepKernel(name, platform), cc(cc), hasInitializedKernels(false) { }
~CommonIntegrateVelocityVerletStepKernel() {}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void initialize(const System& system, const NoseHooverIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator);
private:
ComputeContext& cc;
float prevMaxPairDistance;
ComputeArray maxPairDistanceBuffer, pairListBuffer, atomListBuffer, pairTemperatureBuffer;
ComputeKernel kernel1, kernel2, kernel3, kernelHardWall;
bool hasInitializedKernels;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class CommonNoseHooverChainKernel : public NoseHooverChainKernel {
public:
CommonNoseHooverChainKernel(std::string name, const Platform& platform, ComputeContext& cc) :
NoseHooverChainKernel(name, platform), cc(cc), hasInitializedPropagateKernel(false),
hasInitializedKineticEnergyKernel(false), hasInitializedHeatBathEnergyKernel(false),
hasInitializedScaleVelocitiesKernel(false) {}
~CommonNoseHooverChainKernel() {}
/**
* Initialize the kernel.
*/
void initialize();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private:
int sumWorkGroupSize;
ComputeContext& cc;
ComputeArray energyBuffer, scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, ComputeArray> atomlists, pairlists;
std::map<int, ComputeKernel> propagateKernels;
bool hasInitializedPropagateKernel;
bool hasInitializedKineticEnergyKernel;
bool hasInitializedHeatBathEnergyKernel;
bool hasInitializedScaleVelocitiesKernel;
ComputeKernel reduceEnergyKernel;
ComputeKernel computeHeatBathEnergyKernel;
ComputeKernel computeAtomsKineticEnergyKernel;
ComputeKernel computePairsKineticEnergyKernel;
ComputeKernel scaleAtomsVelocitiesKernel;
ComputeKernel scalePairsVelocitiesKernel;
};
/**
* This kernel is invoked by BrownianIntegrator to take one time step.
*/
......
This diff is collapsed.
KERNEL void propagateNoseHooverChain(GLOBAL mixed2* RESTRICT chainData, GLOBAL const mixed2 * RESTRICT energySum, GLOBAL mixed2* RESTRICT scaleFactor,
GLOBAL mixed* RESTRICT chainMasses, GLOBAL mixed* RESTRICT chainForces, int chainType, int chainLength, int numMTS,
int numDOFs, float timeStep, mixed kT, float frequency){
const mixed kineticEnergy = chainType == 0 ? energySum[0].x : energySum[0].y;
mixed scale = 1;
if(kineticEnergy < 1e-8) return;
for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0] *= numDOFs;
mixed KE2 = 2.0f * kineticEnergy;
mixed timeOverMTS = timeStep / numMTS;
chainForces[0] = (KE2 - numDOFs * kT) / chainMasses[0];
for (int bead = 0; bead < chainLength - 1; ++bead) {
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
for (int mts = 0; mts < numMTS; ++mts) {
BEGIN_YS_LOOP
mixed wdt = ys * timeOverMTS;
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
for (int bead = chainLength - 2; bead >= 0; --bead) {
mixed aa = EXP(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (chainData[bead].y * aa + 0.25f * wdt * chainForces[bead]);
}
// update particle velocities
mixed aa = EXP(-0.5f * wdt * chainData[0].y);
scale *= aa;
// update the thermostat positions
for (int bead = 0; bead < chainLength; ++bead) {
chainData[bead].x += 0.5f * chainData[bead].y * wdt;
}
// update the forces
chainForces[0] = (scale * scale * KE2 - numDOFs * kT) / chainMasses[0];
// update thermostat velocities
for (int bead = 0; bead < chainLength - 1; ++bead) {
mixed aa = EXP(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (aa * chainData[bead].y + 0.25f * wdt * chainForces[bead]);
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
END_YS_LOOP
} // MTS loop
if (chainType == 0) {
scaleFactor[0].x = scale;
} else {
scaleFactor[0].y = scale;
}
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
KERNEL void computeHeatBathEnergy(GLOBAL mixed* RESTRICT heatBathEnergy, int chainLength, int numDOFs,
mixed kT, float frequency, GLOBAL const mixed2* RESTRICT chainData){
// Note that this is always incremented; make sure it's zeroed properly before the first call
for(int i = 0; i < chainLength; ++i) {
mixed prefac = i ? 1 : numDOFs;
mixed mass = prefac * kT / (frequency * frequency);
mixed velocity = chainData[i].y;
// The kinetic energy of this bead
heatBathEnergy[0] += 0.5f * mass * velocity * velocity;
// The potential energy of this bead
mixed position = chainData[i].x;
heatBathEnergy[0] += prefac * kT * position;
}
}
KERNEL void computeAtomsKineticEnergy(GLOBAL mixed2 * RESTRICT energyBuffer, int numAtoms,
GLOBAL const mixed4* RESTRICT velm, GLOBAL const int *RESTRICT atoms){
mixed2 energy = make_mixed2(0,0);
int index = GLOBAL_ID;
while (index < numAtoms){
int atom = atoms[index];
mixed4 v = velm[atom];
mixed mass = v.w == 0 ? 0 : 1 / v.w;
energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
index += GLOBAL_SIZE;
}
energyBuffer[GLOBAL_ID] = energy;
}
KERNEL void computePairsKineticEnergy(GLOBAL mixed2 * RESTRICT energyBuffer, int numPairs,
GLOBAL const mixed4* RESTRICT velm, GLOBAL const int2 *RESTRICT pairs){
mixed2 energy = make_mixed2(0,0);
int index = GLOBAL_ID;
while (index < numPairs){
int2 pair = pairs[index];
int atom1 = pair.x;
int atom2 = pair.y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z);
energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z);
index += GLOBAL_SIZE;
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[GLOBAL_ID].x += energy.x;
energyBuffer[GLOBAL_ID].y += energy.y;
}
KERNEL void scaleAtomsVelocities(GLOBAL mixed2* RESTRICT scaleFactor, int numAtoms,
GLOBAL mixed4* RESTRICT velm, GLOBAL const int *RESTRICT atoms){
const mixed scale = scaleFactor[0].x;
int index = GLOBAL_ID;
while (index < numAtoms){
int atom = atoms[index];
velm[atom].x *= scale;
velm[atom].y *= scale;
velm[atom].z *= scale;
index += GLOBAL_SIZE;
}
}
KERNEL void scalePairsVelocities(GLOBAL mixed2 * RESTRICT scaleFactor, int numPairs,
GLOBAL mixed4* RESTRICT velm, GLOBAL const int2 *RESTRICT pairs){
int index = GLOBAL_ID;
mixed comScale = scaleFactor[0].x;
mixed relScale = scaleFactor[0].y;
while (index < numPairs){
int atom1 = pairs[index].x;
int atom2 = pairs[index].y;
mixed m1 = velm[atom1].w == 0 ? 0 : 1 / velm[atom1].w;
mixed m2 = velm[atom2].w == 0 ? 0 : 1 / velm[atom2].w;
mixed4 cv;
cv.x = (m1*velm[atom1].x + m2*velm[atom2].x) / (m1 + m2);
cv.y = (m1*velm[atom1].y + m2*velm[atom2].y) / (m1 + m2);
cv.z = (m1*velm[atom1].z + m2*velm[atom2].z) / (m1 + m2);
mixed4 rv;
rv.x = velm[atom2].x - velm[atom1].x;
rv.y = velm[atom2].y - velm[atom1].y;
rv.z = velm[atom2].z - velm[atom1].z;
velm[atom1].x = comScale * cv.x - relScale * rv.x * m2 / (m1 + m2);
velm[atom1].y = comScale * cv.y - relScale * rv.y * m2 / (m1 + m2);
velm[atom1].z = comScale * cv.z - relScale * rv.z * m2 / (m1 + m2);
velm[atom2].x = comScale * cv.x + relScale * rv.x * m1 / (m1 + m2);
velm[atom2].y = comScale * cv.y + relScale * rv.y * m1 / (m1 + m2);
velm[atom2].z = comScale * cv.z + relScale * rv.z * m1 / (m1 + m2);
index += GLOBAL_SIZE;
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is taken from the analogous customIntegrator code
*/
KERNEL void reduceEnergyPair(GLOBAL const mixed2* RESTRICT sumBuffer, GLOBAL mixed2* result, int bufferSize) {
LOCAL mixed2 tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = LOCAL_ID;
mixed2 sum = make_mixed2(0,0);
for (unsigned int index = thread; index < bufferSize; index += LOCAL_SIZE) {
sum.x += sumBuffer[index].x;
sum.y += sumBuffer[index].y;
}
tempBuffer[thread].x = sum.x;
tempBuffer[thread].y = sum.y;
for (int i = 1; i < WORK_GROUP_SIZE; i *= 2) {
SYNC_THREADS;
if (thread%(i*2) == 0 && thread+i < WORK_GROUP_SIZE) {
tempBuffer[thread].x += tempBuffer[thread+i].x;
tempBuffer[thread].y += tempBuffer[thread+i].y;
}
}
if (thread == 0)
*result = tempBuffer[0];
}
/**
* Perform the first step of Velocity Verlet integration.
*/
KERNEL void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedNumAtoms, GLOBAL const mixed2* RESTRICT dt, GLOBAL const real4* RESTRICT posq,
GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed4* RESTRICT posDelta,
GLOBAL const int* RESTRICT atomList, GLOBAL const int2* RESTRICT pairList
#ifdef USE_MIXED_PRECISION
,GLOBAL const real4* RESTRICT posqCorrection
#endif
){
const mixed2 stepSize = dt[0];
const mixed dtPos = stepSize.y;
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
const mixed scale = 0.5f * dtVel/(mixed) 0x100000000;
int index = GLOBAL_ID;
while (index < numAtoms) {
int atom = atomList[index];
mixed4 velocity = velm[atom];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[atom];
real4 pos2 = posqCorrection[atom];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[atom];
#endif
velocity.x += scale*force[atom]*velocity.w;
velocity.y += scale*force[atom+paddedNumAtoms]*velocity.w;
velocity.z += scale*force[atom+paddedNumAtoms*2]*velocity.w;
pos.x = velocity.x*dtPos;
pos.y = velocity.y*dtPos;
pos.z = velocity.z*dtPos;
posDelta[atom] = pos;
velm[atom] = velocity;
}
index += GLOBAL_SIZE;
}
index = GLOBAL_ID;
while (index < numPairs){
int atom1 = pairList[index].x;
int atom2 = pairList[index].y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0.0f ? 0.0f : 1.0f / v1.w;
mixed m2 = v2.w == 0.0f ? 0.0f : 1.0f / v2.w;
mixed mass1fract = m1 / (m1 + m2);
mixed mass2fract = m2 / (m1 + m2);
mixed invRedMass = (m1 * m2 != 0.0f) ? (m1 + m2)/(m1 * m2) : 0.0f;
mixed invTotMass = (m1 + m2 != 0.0f) ? 1.0f /(m1 + m2) : 0.0f;
mixed3 comVel;
comVel.x= v1.x*mass1fract + v2.x*mass2fract;
comVel.y= v1.y*mass1fract + v2.y*mass2fract;
comVel.z= v1.z*mass1fract + v2.z*mass2fract;
mixed3 relVel;
relVel.x= v2.x - v1.x;
relVel.y= v2.y - v1.y;
relVel.z= v2.z - v1.z;
mixed3 comFrc;
mixed F1x = scale*force[atom1];
mixed F1y = scale*force[atom1+paddedNumAtoms];
mixed F1z = scale*force[atom1+paddedNumAtoms*2];
mixed F2x = scale*force[atom2];
mixed F2y = scale*force[atom2+paddedNumAtoms];
mixed F2z = scale*force[atom2+paddedNumAtoms*2];
comFrc.x = F1x + F2x;
comFrc.y = F1y + F2y;
comFrc.z = F1z + F2z;
mixed3 relFrc;
relFrc.x = mass1fract*F2x - mass2fract*F1x;
relFrc.y = mass1fract*F2y - mass2fract*F1y;
relFrc.z = mass1fract*F2z - mass2fract*F1z;
comVel.x += comFrc.x * invTotMass;
comVel.y += comFrc.y * invTotMass;
comVel.z += comFrc.z * invTotMass;
relVel.x += relFrc.x * invRedMass;
relVel.y += relFrc.y * invRedMass;
relVel.z += relFrc.z * invRedMass;
#ifdef USE_MIXED_PRECISION
real4 posv1 = posq[atom1];
real4 posv2 = posq[atom2];
real4 posc1 = posqCorrection[atom1];
real4 posc2 = posqCorrection[atom2];
mixed4 pos1 = make_mixed4(posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
mixed4 pos2 = make_mixed4(posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
#else
real4 pos1 = posq[atom1];
real4 pos2 = posq[atom2];
#endif
if (v1.w != 0.0f) {
v1.x = comVel.x - relVel.x*mass2fract;
v1.y = comVel.y - relVel.y*mass2fract;
v1.z = comVel.z - relVel.z*mass2fract;
pos1.x = v1.x*dtPos;
pos1.y = v1.y*dtPos;
pos1.z = v1.z*dtPos;
posDelta[atom1] = pos1;
velm[atom1] = v1;
}
if (v2.w != 0.0f) {
v2.x = comVel.x + relVel.x*mass1fract;
v2.y = comVel.y + relVel.y*mass1fract;
v2.z = comVel.z + relVel.z*mass1fract;
pos2.x = v2.x*dtPos;
pos2.y = v2.y*dtPos;
pos2.z = v2.z*dtPos;
posDelta[atom2] = pos2;
velm[atom2] = v2;
}
index += GLOBAL_SIZE;
}
}
/**
* Perform the second step of Velocity Verlet integration.
*/
KERNEL void integrateVelocityVerletPart2(int numAtoms, GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT velm,
GLOBAL const mixed4* RESTRICT posDelta
#ifdef USE_MIXED_PRECISION
,GLOBAL real4* RESTRICT posqCorrection
#endif
){
mixed2 stepSize = dt[0];
int index = GLOBAL_ID;
if (index == 0)
dt[0].x = stepSize.y;
while(index < numAtoms) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
mixed4 delta = posDelta[index];
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
#ifdef USE_MIXED_PRECISION
posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
index += GLOBAL_SIZE;
}
}
/**
* Perform the third step of Velocity Verlet integration.
*/
KERNEL void integrateVelocityVerletPart3(int numAtoms, int numPairs, int paddedNumAtoms, GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq,
GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL const mixed4* RESTRICT posDelta,
GLOBAL const int* RESTRICT atomList, GLOBAL const int2* RESTRICT pairList
#ifdef USE_MIXED_PRECISION
,GLOBAL const real4* RESTRICT posqCorrection
#endif
){
mixed2 stepSize = dt[0];
#ifndef SUPPORTS_DOUBLE_PRECISION
double oneOverDt = 1.0/stepSize.y;
#else
float oneOverDt = 1.0f/stepSize.y;
float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
const mixed scale = 0.5f*dtVel/(mixed) 0x100000000;
int index = GLOBAL_ID;
if (index == 0)
dt[0].x = stepSize.y;
while(index < numAtoms) {
int atom = atomList[index];
mixed4 velocity = velm[atom];
if (velocity.w != 0.0) {
mixed4 deltaXconstrained = posDelta[atom];
velocity.x += scale*force[atom]*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt;
velocity.y += scale*force[atom+paddedNumAtoms]*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt;
velocity.z += scale*force[atom+paddedNumAtoms*2]*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction;
velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction;
velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction;
#endif
velm[atom] = velocity;
}
index += GLOBAL_SIZE;
}
index = GLOBAL_ID;
while(index < numPairs) {
int atom1 = pairList[index].x;
int atom2 = pairList[index].y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0.0f ? 0.0f : 1.0f / v1.w;
mixed m2 = v2.w == 0.0f ? 0.0f : 1.0f / v2.w;
mixed mass1fract = m1 / (m1 + m2);
mixed mass2fract = m2 / (m1 + m2);
mixed invRedMass = (m1 * m2 != 0.0f) ? (m1 + m2)/(m1 * m2) : 0.0f;
mixed invTotMass = (m1 + m2 != 0.0f) ? 1.0f /(m1 + m2) : 0.0f;
mixed3 comVel;
comVel.x= v1.x*mass1fract + v2.x*mass2fract;
comVel.y= v1.y*mass1fract + v2.y*mass2fract;
comVel.z= v1.z*mass1fract + v2.z*mass2fract;
mixed3 relVel;
relVel.x= v2.x - v1.x;
relVel.y= v2.y - v1.y;
relVel.z= v2.z - v1.z;
mixed3 comFrc;
mixed F1x = scale*force[atom1];
mixed F1y = scale*force[atom1+paddedNumAtoms];
mixed F1z = scale*force[atom1+paddedNumAtoms*2];
mixed F2x = scale*force[atom2];
mixed F2y = scale*force[atom2+paddedNumAtoms];
mixed F2z = scale*force[atom2+paddedNumAtoms*2];
comFrc.x = F1x + F2x;
comFrc.y = F1y + F2y;
comFrc.z = F1z + F2z;
mixed3 relFrc;
relFrc.x = mass1fract*F2x - mass2fract*F1x;
relFrc.y = mass1fract*F2y - mass2fract*F1y;
relFrc.z = mass1fract*F2z - mass2fract*F1z;
comVel.x += comFrc.x * invTotMass;
comVel.y += comFrc.y * invTotMass;
comVel.z += comFrc.z * invTotMass;
relVel.x += relFrc.x * invRedMass;
relVel.y += relFrc.y * invRedMass;
relVel.z += relFrc.z * invRedMass;
if (v1.w != 0.0f) {
mixed4 deltaXconstrained = posDelta[atom1];
v1.x = comVel.x - relVel.x*mass2fract + (deltaXconstrained.x - v1.x*stepSize.y)*oneOverDt;
v1.y = comVel.y - relVel.y*mass2fract + (deltaXconstrained.y - v1.y*stepSize.y)*oneOverDt;
v1.z = comVel.z - relVel.z*mass2fract + (deltaXconstrained.z - v1.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
v1.x += (deltaXconstrained.x - v1.x*stepSize.y)*correction;
v1.y += (deltaXconstrained.y - v1.y*stepSize.y)*correction;
v1.z += (deltaXconstrained.z - v1.z*stepSize.y)*correction;
#endif
velm[atom1] = v1;
}
if (v2.w != 0.0f) {
mixed4 deltaXconstrained = posDelta[atom2];
v2.x = comVel.x + relVel.x*mass1fract + (deltaXconstrained.x - v2.x*stepSize.y)*oneOverDt;
v2.y = comVel.y + relVel.y*mass1fract + (deltaXconstrained.y - v2.y*stepSize.y)*oneOverDt;
v2.z = comVel.z + relVel.z*mass1fract + (deltaXconstrained.z - v2.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
v2.x += (deltaXconstrained.x - v2.x*stepSize.y)*correction;
v2.y += (deltaXconstrained.y - v2.y*stepSize.y)*correction;
v2.z += (deltaXconstrained.z - v2.z*stepSize.y)*correction;
#endif
velm[atom2] = v2;
}
index += GLOBAL_SIZE;
}
}
KERNEL void integrateVelocityVerletHardWall(int numPairs, GLOBAL const float* RESTRICT maxPairDistance,
GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq,
GLOBAL mixed4* RESTRICT velm, GLOBAL const int2* RESTRICT pairList,
GLOBAL const float* RESTRICT pairTemperature
#ifdef USE_MIXED_PRECISION
,GLOBAL real4* RESTRICT posqCorrection
#endif
){
mixed dtPos = dt[0].y;
mixed maxDelta = (mixed) maxPairDistance[0];
if (maxDelta > 0){
int index = GLOBAL_ID;
while(index < numPairs) {
const mixed hardWallScale = sqrt( ((mixed) pairTemperature[index]) * ((mixed) BOLTZ));
int atom1 = pairList[index].x;
int atom2 = pairList[index].y;
#ifdef USE_MIXED_PRECISION
real4 posv1 = posq[atom1];
real4 posc1 = posqCorrection[atom1];
mixed4 pos1 = make_mixed4(posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
real4 posv2 = posq[atom2];
real4 posc2 = posqCorrection[atom2];
mixed4 pos2 = make_mixed4(posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
#else
real4 pos1 = posq[atom1];
real4 pos2 = posq[atom2];
#endif
mixed3 delta = make_mixed3(pos1.x - pos2.x, pos1.y - pos2.y, pos1.z - pos2.z);
mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
mixed rInv = 1/r;
if (rInv*maxDelta < 1.0) {
// The constraint has been violated, so make the inter-particle distance "bounce"
// off the hard wall.
mixed3 bondDir = make_mixed3(delta.x * rInv, delta.y * rInv, delta.z * rInv);
mixed3 vel1 = make_mixed3(velm[atom1].x, velm[atom1].y, velm[atom1].z);
mixed3 vel2 = make_mixed3(velm[atom2].x, velm[atom2].y, velm[atom2].z);
mixed m1 = velm[atom1].w != 0.0 ? 1.0/velm[atom1].w : 0.0;
mixed m2 = velm[atom2].w != 0.0 ? 1.0/velm[atom2].w : 0.0;
mixed invTotMass = (m1 + m2 != 0.0) ? 1.0 /(m1 + m2) : 0.0;
mixed deltaR = r-maxDelta;
mixed deltaT = dtPos;
mixed dt = dtPos;
mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
mixed3 vb1 = make_mixed3(bondDir.x*dotvr1, bondDir.y*dotvr1, bondDir.z*dotvr1);
mixed3 vp1 = make_mixed3(vel1.x-vb1.x, vel1.y-vb1.y, vel1.z-vb1.z);
if (m2 == 0) {
// The parent particle is massless, so move only the Drude particle.
if (dotvr1 != 0.0)
deltaT = deltaR/fabs(dotvr1);
if (deltaT > dtPos)
deltaT = dtPos;
dotvr1 = -dotvr1*hardWallScale/(fabs(dotvr1)*sqrt(m1));
mixed dr = -deltaR + deltaT*dotvr1;
pos1.x += bondDir.x*dr;
pos1.y += bondDir.y*dr;
pos1.z += bondDir.z*dr;
velm[atom1] = make_mixed4(vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom1].w);
#ifdef USE_MIXED_PRECISION
posq[atom1] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posqCorrection[atom1] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
#else
posq[atom1] = pos1;
#endif
}
else {
// Move both particles.
mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
mixed3 vb2 = make_mixed3(bondDir.x*dotvr2, bondDir.y*dotvr2, bondDir.z*dotvr2);
mixed3 vp2 = make_mixed3(vel2.x-vb2.x, vel2.y-vb2.y, vel2.z-vb2.z);
mixed vbCMass = (m1*dotvr1 + m2*dotvr2)*invTotMass;
dotvr1 -= vbCMass;
dotvr2 -= vbCMass;
if (dotvr1 != dotvr2)
deltaT = deltaR/fabs(dotvr1-dotvr2);
if (deltaT > dt)
deltaT = dt;
mixed vBond = hardWallScale/sqrt(m1);
dotvr1 = -dotvr1*vBond*m2*invTotMass/fabs(dotvr1);
dotvr2 = -dotvr2*vBond*m1*invTotMass/fabs(dotvr2);
mixed dr1 = -deltaR*m2*invTotMass + deltaT*dotvr1;
mixed dr2 = deltaR*m1*invTotMass + deltaT*dotvr2;
dotvr1 += vbCMass;
dotvr2 += vbCMass;
pos1.x += bondDir.x*dr1;
pos1.y += bondDir.y*dr1;
pos1.z += bondDir.z*dr1;
pos2.x += bondDir.x*dr2;
pos2.y += bondDir.y*dr2;
pos2.z += bondDir.z*dr2;
velm[atom1] = make_mixed4(vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom1].w);
velm[atom2] = make_mixed4(vp2.x + bondDir.x*dotvr2, vp2.y + bondDir.y*dotvr2, vp2.z + bondDir.z*dotvr2, velm[atom2].w);
#ifdef USE_MIXED_PRECISION
posq[atom1] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posq[atom2] = make_real4((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
posqCorrection[atom1] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
posqCorrection[atom2] = make_real4(pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
#else
posq[atom1] = pos1;
posq[atom2] = pos2;
#endif
}
}
index += GLOBAL_SIZE;
}
}
}
......@@ -134,9 +134,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
if (name == ApplyAndersenThermostatKernel::Name())
return new CommonApplyAndersenThermostatKernel(name, platform, cu);
if (name == NoseHooverChainKernel::Name())
return new CudaNoseHooverChainKernel(name, platform, cu);
return new CommonNoseHooverChainKernel(name, platform, cu);
if (name == IntegrateVelocityVerletStepKernel::Name())
return new CudaIntegrateVelocityVerletStepKernel(name, platform, cu);
return new CommonIntegrateVelocityVerletStepKernel(name, platform, cu);
if (name == ApplyMonteCarloBarostatKernel::Name())
return new CudaApplyMonteCarloBarostatKernel(name, platform, cu);
if (name == RemoveCMMotionKernel::Name())
......
......@@ -132,9 +132,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == ApplyAndersenThermostatKernel::Name())
return new CommonApplyAndersenThermostatKernel(name, platform, cl);
if (name == NoseHooverChainKernel::Name())
return new OpenCLNoseHooverChainKernel(name, platform, cl);
return new CommonNoseHooverChainKernel(name, platform, cl);
if (name == IntegrateVelocityVerletStepKernel::Name())
return new OpenCLIntegrateVelocityVerletStepKernel(name, platform, cl);
return new CommonIntegrateVelocityVerletStepKernel(name, platform, cl);
if (name == ApplyMonteCarloBarostatKernel::Name())
return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment