Commit b2222de3 authored by Peter Eastman's avatar Peter Eastman
Browse files

CUDA implementation of LocalCoordinatesSite depending on arbitrary particles

parent 80c223a8
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2014 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -158,8 +158,11 @@ private:
CudaArray* vsite3AvgWeights;
CudaArray* vsiteOutOfPlaneAtoms;
CudaArray* vsiteOutOfPlaneWeights;
CudaArray* vsiteLocalCoordsIndex;
CudaArray* vsiteLocalCoordsAtoms;
CudaArray* vsiteLocalCoordsParams;
CudaArray* vsiteLocalCoordsWeights;
CudaArray* vsiteLocalCoordsPos;
CudaArray* vsiteLocalCoordsStartIndex;
int randomPos;
int lastSeed, numVsites;
double2 lastStepSize;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -103,7 +103,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedMemory(NULL),
vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsAtoms(NULL), vsiteLocalCoordsParams(NULL) {
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsIndex(NULL), vsiteLocalCoordsAtoms(NULL),
vsiteLocalCoordsWeights(NULL), vsiteLocalCoordsPos(NULL), vsiteLocalCoordsStartIndex(NULL) {
// Create workspace arrays.
lastStepSize = make_double2(0.0, 0.0);
......@@ -454,8 +455,11 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vector<double4> vsite3AvgWeightVec;
vector<int4> vsiteOutOfPlaneAtomVec;
vector<double4> vsiteOutOfPlaneWeightVec;
vector<int4> vsiteLocalCoordsAtomVec;
vector<double> vsiteLocalCoordsParamVec;
vector<int> vsiteLocalCoordsIndexVec;
vector<int> vsiteLocalCoordsAtomVec;
vector<int> vsiteLocalCoordsStartVec;
vector<double> vsiteLocalCoordsWeightVec;
vector<double4> vsiteLocalCoordsPosVec;
for (int i = 0; i < numAtoms; i++) {
if (system.isVirtualSite(i)) {
if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
......@@ -480,64 +484,72 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneWeightVec.push_back(make_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
}
else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
// An out of plane site.
// A local coordinates site.
const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
vsiteLocalCoordsAtomVec.push_back(make_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
Vec3 origin = site.getOriginWeights();
Vec3 x = site.getXWeights();
Vec3 y = site.getYWeights();
int numParticles = site.getNumParticles();
vector<double> origin, x, y;
site.getOriginWeights(origin);
site.getXWeights(x);
site.getYWeights(y);
vsiteLocalCoordsIndexVec.push_back(i);
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
for (int j = 0; j < numParticles; j++) {
vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
vsiteLocalCoordsWeightVec.push_back(origin[j]);
vsiteLocalCoordsWeightVec.push_back(x[j]);
vsiteLocalCoordsWeightVec.push_back(y[j]);
}
Vec3 pos = site.getLocalPosition();
vsiteLocalCoordsParamVec.push_back(origin[0]);
vsiteLocalCoordsParamVec.push_back(origin[1]);
vsiteLocalCoordsParamVec.push_back(origin[2]);
vsiteLocalCoordsParamVec.push_back(x[0]);
vsiteLocalCoordsParamVec.push_back(x[1]);
vsiteLocalCoordsParamVec.push_back(x[2]);
vsiteLocalCoordsParamVec.push_back(y[0]);
vsiteLocalCoordsParamVec.push_back(y[1]);
vsiteLocalCoordsParamVec.push_back(y[2]);
vsiteLocalCoordsParamVec.push_back(pos[0]);
vsiteLocalCoordsParamVec.push_back(pos[1]);
vsiteLocalCoordsParamVec.push_back(pos[2]);
vsiteLocalCoordsPosVec.push_back(make_double4(pos[0], pos[1], pos[2], 0.0));
}
}
}
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
int numLocalCoords = vsiteLocalCoordsAtomVec.size();
int numLocalCoords = vsiteLocalCoordsPosVec.size();
vsite2AvgAtoms = CudaArray::create<int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite3AvgAtoms = CudaArray::create<int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsiteOutOfPlaneAtoms = CudaArray::create<int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteLocalCoordsAtoms = CudaArray::create<int4>(context, max(1, numLocalCoords), "vsiteLocalCoordinatesAtoms");
vsiteLocalCoordsIndex = CudaArray::create<int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
vsiteLocalCoordsAtoms = CudaArray::create<int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
vsiteLocalCoordsStartIndex = CudaArray::create<int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
if (num2Avg > 0)
vsite2AvgAtoms->upload(vsite2AvgAtomVec);
if (num3Avg > 0)
vsite3AvgAtoms->upload(vsite3AvgAtomVec);
if (numOutOfPlane > 0)
vsiteOutOfPlaneAtoms->upload(vsiteOutOfPlaneAtomVec);
if (numLocalCoords > 0)
if (numLocalCoords > 0) {
vsiteLocalCoordsIndex->upload(vsiteLocalCoordsIndexVec);
vsiteLocalCoordsAtoms->upload(vsiteLocalCoordsAtomVec);
vsiteLocalCoordsStartIndex->upload(vsiteLocalCoordsStartVec);
}
if (context.getUseDoublePrecision()) {
vsite2AvgWeights = CudaArray::create<double2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = CudaArray::create<double4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = CudaArray::create<double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsParams = CudaArray::create<double>(context, max(1, 12*numLocalCoords), "vsiteLocalCoordinatesParams");
vsiteLocalCoordsWeights = CudaArray::create<double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = CudaArray::create<double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0)
vsite2AvgWeights->upload(vsite2AvgWeightVec);
if (num3Avg > 0)
vsite3AvgWeights->upload(vsite3AvgWeightVec);
if (numOutOfPlane > 0)
vsiteOutOfPlaneWeights->upload(vsiteOutOfPlaneWeightVec);
if (numLocalCoords > 0)
vsiteLocalCoordsParams->upload(vsiteLocalCoordsParamVec);
if (numLocalCoords > 0) {
vsiteLocalCoordsWeights->upload(vsiteLocalCoordsWeightVec);
vsiteLocalCoordsPos->upload(vsiteLocalCoordsPosVec);
}
}
else {
vsite2AvgWeights = CudaArray::create<float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = CudaArray::create<float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = CudaArray::create<float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsParams = CudaArray::create<float>(context, max(1, 12*numLocalCoords), "vsiteLocalCoordinatesParams");
vsiteLocalCoordsWeights = CudaArray::create<float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = CudaArray::create<float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0) {
vector<float2> floatWeights(num2Avg);
for (int i = 0; i < num2Avg; i++)
......@@ -557,10 +569,14 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneWeights->upload(floatWeights);
}
if (numLocalCoords > 0) {
vector<float> floatParams(vsiteLocalCoordsParamVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsParamVec.size(); i++)
floatParams[i] = (float) vsiteLocalCoordsParamVec[i];
vsiteLocalCoordsParams->upload(floatParams);
vector<float> floatWeights(vsiteLocalCoordsWeightVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++)
floatWeights[i] = (float) vsiteLocalCoordsWeightVec[i];
vsiteLocalCoordsWeights->upload(floatWeights);
vector<float4> floatPos(vsiteLocalCoordsPosVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++)
floatPos[i] = make_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f);
vsiteLocalCoordsPos->upload(floatPos);
}
}
......@@ -644,10 +660,16 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete vsiteOutOfPlaneAtoms;
if (vsiteOutOfPlaneWeights != NULL)
delete vsiteOutOfPlaneWeights;
if (vsiteLocalCoordsIndex != NULL)
delete vsiteLocalCoordsIndex;
if (vsiteLocalCoordsAtoms != NULL)
delete vsiteLocalCoordsAtoms;
if (vsiteLocalCoordsParams != NULL)
delete vsiteLocalCoordsParams;
if (vsiteLocalCoordsWeights != NULL)
delete vsiteLocalCoordsWeights;
if (vsiteLocalCoordsPos != NULL)
delete vsiteLocalCoordsPos;
if (vsiteLocalCoordsStartIndex != NULL)
delete vsiteLocalCoordsStartIndex;
}
void CudaIntegrationUtilities::setNextStepSize(double size) {
......@@ -747,7 +769,9 @@ void CudaIntegrationUtilities::computeVirtualSites() {
void* args[] = {&context.getPosq().getDevicePointer(), &posCorrection, &vsite2AvgAtoms->getDevicePointer(), &vsite2AvgWeights->getDevicePointer(),
&vsite3AvgAtoms->getDevicePointer(), &vsite3AvgWeights->getDevicePointer(),
&vsiteOutOfPlaneAtoms->getDevicePointer(), &vsiteOutOfPlaneWeights->getDevicePointer(),
&vsiteLocalCoordsAtoms->getDevicePointer(), &vsiteLocalCoordsParams->getDevicePointer()};
&vsiteLocalCoordsIndex->getDevicePointer(), &vsiteLocalCoordsAtoms->getDevicePointer(),
&vsiteLocalCoordsWeights->getDevicePointer(), &vsiteLocalCoordsPos->getDevicePointer(),
&vsiteLocalCoordsStartIndex->getDevicePointer()};
context.executeKernel(vsitePositionKernel, args, numVsites);
}
}
......@@ -759,7 +783,9 @@ void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
&vsite2AvgAtoms->getDevicePointer(), &vsite2AvgWeights->getDevicePointer(),
&vsite3AvgAtoms->getDevicePointer(), &vsite3AvgWeights->getDevicePointer(),
&vsiteOutOfPlaneAtoms->getDevicePointer(), &vsiteOutOfPlaneWeights->getDevicePointer(),
&vsiteLocalCoordsAtoms->getDevicePointer(), &vsiteLocalCoordsParams->getDevicePointer()};
&vsiteLocalCoordsIndex->getDevicePointer(), &vsiteLocalCoordsAtoms->getDevicePointer(),
&vsiteLocalCoordsWeights->getDevicePointer(), &vsiteLocalCoordsPos->getDevicePointer(),
&vsiteLocalCoordsStartIndex->getDevicePointer()};
context.executeKernel(vsiteForceKernel, args, numVsites);
}
}
......
......@@ -680,7 +680,9 @@ extern "C" __global__ void updateCCMAAtomPositions(const int* __restrict__ numAt
extern "C" __global__ void computeVirtualSites(real4* __restrict__ posq, real4* __restrict__ posqCorrection, const int4* __restrict__ avg2Atoms, const real2* __restrict__ avg2Weights,
const int4* __restrict__ avg3Atoms, const real4* __restrict__ avg3Weights,
const int4* __restrict__ outOfPlaneAtoms, const real4* __restrict__ outOfPlaneWeights,
const int4* __restrict__ localCoordsAtoms, const real* __restrict__ localCoordsParams) {
const int* __restrict__ localCoordsIndex, const int* __restrict__ localCoordsAtoms,
const real* __restrict__ localCoordsWeights, const real4* __restrict__ localCoordsPos,
const int* __restrict__ localCoordsStartIndex) {
// Two particle average sites.
......@@ -732,30 +734,31 @@ extern "C" __global__ void computeVirtualSites(real4* __restrict__ posq, real4*
// Local coordinates sites.
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_LOCAL_COORDS; index += blockDim.x*gridDim.x) {
int4 atoms = localCoordsAtoms[index];
const real* params = &localCoordsParams[12*index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1_4 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2_4 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3_4 = loadPos(posq, posqCorrection, atoms.w);
mixed3 pos1 = make_mixed3(pos1_4.x, pos1_4.y, pos1_4.z);
mixed3 pos2 = make_mixed3(pos2_4.x, pos2_4.y, pos2_4.z);
mixed3 pos3 = make_mixed3(pos3_4.x, pos3_4.y, pos3_4.z);
mixed3 originWeights = make_mixed3(params[0], params[1], params[2]);
mixed3 xWeights = make_mixed3(params[3], params[4], params[5]);
mixed3 yWeights = make_mixed3(params[6], params[7], params[8]);
mixed3 localPosition = make_mixed3(params[9], params[10], params[11]);
mixed3 origin = pos1*originWeights.x + pos2*originWeights.y + pos3*originWeights.z;
mixed3 xdir = pos1*xWeights.x + pos2*xWeights.y + pos3*xWeights.z;
mixed3 ydir = pos1*yWeights.x + pos2*yWeights.y + pos3*yWeights.z;
int siteAtomIndex = localCoordsIndex[index];
int start = localCoordsStartIndex[index];
int end = localCoordsStartIndex[index+1];
mixed3 origin = make_mixed3(0), xdir = make_mixed3(0), ydir = make_mixed3(0);
for (int j = start; j < end; j++) {
mixed3 pos = trimTo3(loadPos(posq, posqCorrection, localCoordsAtoms[j]));
origin += pos*localCoordsWeights[3*j];
xdir += pos*localCoordsWeights[3*j+1];
ydir += pos*localCoordsWeights[3*j+2];
}
mixed3 zdir = cross(xdir, ydir);
xdir *= rsqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
zdir *= rsqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
xdir *= invNormXdir;
zdir *= invNormZdir;
ydir = cross(zdir, xdir);
real4 localPosition_4 = localCoordsPos[index];
mixed3 localPosition = make_mixed3(localPosition_4.x, localPosition_4.y, localPosition_4.z);
mixed4 pos = loadPos(posq, posqCorrection, siteAtomIndex);
pos.x = origin.x + xdir.x*localPosition.x + ydir.x*localPosition.y + zdir.x*localPosition.z;
pos.y = origin.y + xdir.y*localPosition.x + ydir.y*localPosition.y + zdir.y*localPosition.z;
pos.z = origin.z + xdir.z*localPosition.x + ydir.z*localPosition.y + zdir.z*localPosition.z;
storePos(posq, posqCorrection, atoms.x, pos);
storePos(posq, posqCorrection, siteAtomIndex, pos);
}
}
......@@ -778,7 +781,9 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
const int4* __restrict__ avg2Atoms, const real2* __restrict__ avg2Weights,
const int4* __restrict__ avg3Atoms, const real4* __restrict__ avg3Weights,
const int4* __restrict__ outOfPlaneAtoms, const real4* __restrict__ outOfPlaneWeights,
const int4* __restrict__ localCoordsAtoms, const real* __restrict__ localCoordsParams) {
const int* __restrict__ localCoordsIndex, const int* __restrict__ localCoordsAtoms,
const real* __restrict__ localCoordsWeights, const real4* __restrict__ localCoordsPos,
const int* __restrict__ localCoordsStartIndex) {
// Two particle average sites.
......@@ -826,87 +831,56 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
// Local coordinates sites.
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_LOCAL_COORDS; index += blockDim.x*gridDim.x) {
int4 atoms = localCoordsAtoms[index];
const real* params = &localCoordsParams[12*index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1_4 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2_4 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3_4 = loadPos(posq, posqCorrection, atoms.w);
mixed3 pos1 = make_mixed3(pos1_4.x, pos1_4.y, pos1_4.z);
mixed3 pos2 = make_mixed3(pos2_4.x, pos2_4.y, pos2_4.z);
mixed3 pos3 = make_mixed3(pos3_4.x, pos3_4.y, pos3_4.z);
mixed3 originWeights = make_mixed3(params[0], params[1], params[2]);
mixed3 wx = make_mixed3(params[3], params[4], params[5]);
mixed3 wy = make_mixed3(params[6], params[7], params[8]);
mixed3 localPosition = make_mixed3(params[9], params[10], params[11]);
mixed3 origin = pos1*originWeights.x + pos2*originWeights.y + pos3*originWeights.z;
mixed3 xdir = pos1*wx.x + pos2*wx.y + pos3*wx.z;
mixed3 ydir = pos1*wy.x + pos2*wy.y + pos3*wy.z;
int siteAtomIndex = localCoordsIndex[index];
int start = localCoordsStartIndex[index];
int end = localCoordsStartIndex[index+1];
mixed3 origin = make_mixed3(0), xdir = make_mixed3(0), ydir = make_mixed3(0);
for (int j = start; j < end; j++) {
mixed3 pos = trimTo3(loadPos(posq, posqCorrection, localCoordsAtoms[j]));
origin += pos*localCoordsWeights[3*j];
xdir += pos*localCoordsWeights[3*j+1];
ydir += pos*localCoordsWeights[3*j+2];
}
mixed3 zdir = cross(xdir, ydir);
mixed invNormXdir = rsqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed invNormZdir = rsqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
mixed3 dx = xdir*invNormXdir;
mixed3 dz = zdir*invNormZdir;
mixed3 dy = cross(dz, dx);
real4 localPosition_4 = localCoordsPos[index];
mixed3 localPosition = make_mixed3(localPosition_4.x, localPosition_4.y, localPosition_4.z);
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
mixed t11 = (wx.x*ydir.x-wy.x*xdir.x)*invNormZdir;
mixed t12 = (wx.x*ydir.y-wy.x*xdir.y)*invNormZdir;
mixed t13 = (wx.x*ydir.z-wy.x*xdir.z)*invNormZdir;
mixed t21 = (wx.y*ydir.x-wy.y*xdir.x)*invNormZdir;
mixed t22 = (wx.y*ydir.y-wy.y*xdir.y)*invNormZdir;
mixed t23 = (wx.y*ydir.z-wy.y*xdir.z)*invNormZdir;
mixed t31 = (wx.z*ydir.x-wy.z*xdir.x)*invNormZdir;
mixed t32 = (wx.z*ydir.y-wy.z*xdir.y)*invNormZdir;
mixed t33 = (wx.z*ydir.z-wy.z*xdir.z)*invNormZdir;
mixed sx1 = t13*dz.y-t12*dz.z;
mixed sy1 = t11*dz.z-t13*dz.x;
mixed sz1 = t12*dz.x-t11*dz.y;
mixed sx2 = t23*dz.y-t22*dz.z;
mixed sy2 = t21*dz.z-t23*dz.x;
mixed sz2 = t22*dz.x-t21*dz.y;
mixed sx3 = t33*dz.y-t32*dz.z;
mixed sy3 = t31*dz.z-t33*dz.x;
mixed sz3 = t32*dz.x-t31*dz.y;
mixed3 wxScaled = wx*invNormXdir;
real3 f = loadForce(atoms.x, force);
real3 f = loadForce(siteAtomIndex, force);
mixed3 fp1 = localPosition*f.x;
mixed3 fp2 = localPosition*f.y;
mixed3 fp3 = localPosition*f.z;
real3 f1 = make_real3(0);
real3 f2 = make_real3(0);
real3 f3 = make_real3(0);
f1.x += fp1.x*wxScaled.x*(1-dx.x*dx.x) + fp1.z*(dz.x*sx1 ) + fp1.y*((-dx.x*dy.x )*wxScaled.x + dy.x*sx1 - dx.y*t12 - dx.z*t13) + f.x*originWeights.x;
f1.y += fp1.x*wxScaled.x*( -dx.x*dx.y) + fp1.z*(dz.x*sy1+t13) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.x + dy.x*sy1 + dx.y*t11);
f1.z += fp1.x*wxScaled.x*( -dx.x*dx.z) + fp1.z*(dz.x*sz1-t12) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.x + dy.x*sz1 + dx.z*t11);
f2.x += fp1.x*wxScaled.y*(1-dx.x*dx.x) + fp1.z*(dz.x*sx2 ) + fp1.y*((-dx.x*dy.x )*wxScaled.y + dy.x*sx2 - dx.y*t22 - dx.z*t23) + f.x*originWeights.y;
f2.y += fp1.x*wxScaled.y*( -dx.x*dx.y) + fp1.z*(dz.x*sy2+t23) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.y + dy.x*sy2 + dx.y*t21);
f2.z += fp1.x*wxScaled.y*( -dx.x*dx.z) + fp1.z*(dz.x*sz2-t22) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.y + dy.x*sz2 + dx.z*t21);
f3.x += fp1.x*wxScaled.z*(1-dx.x*dx.x) + fp1.z*(dz.x*sx3 ) + fp1.y*((-dx.x*dy.x )*wxScaled.z + dy.x*sx3 - dx.y*t32 - dx.z*t33) + f.x*originWeights.z;
f3.y += fp1.x*wxScaled.z*( -dx.x*dx.y) + fp1.z*(dz.x*sy3+t33) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.z + dy.x*sy3 + dx.y*t31);
f3.z += fp1.x*wxScaled.z*( -dx.x*dx.z) + fp1.z*(dz.x*sz3-t32) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.z + dy.x*sz3 + dx.z*t31);
f1.x += fp2.x*wxScaled.x*( -dx.y*dx.x) + fp2.z*(dz.y*sx1-t13) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.x - dy.y*sx1 - dx.x*t12);
f1.y += fp2.x*wxScaled.x*(1-dx.y*dx.y) + fp2.z*(dz.y*sy1 ) - fp2.y*(( dx.y*dy.y )*wxScaled.x - dy.y*sy1 + dx.x*t11 + dx.z*t13) + f.y*originWeights.x;
f1.z += fp2.x*wxScaled.x*( -dx.y*dx.z) + fp2.z*(dz.y*sz1+t11) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.x - dy.y*sz1 - dx.z*t12);
f2.x += fp2.x*wxScaled.y*( -dx.y*dx.x) + fp2.z*(dz.y*sx2-t23) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.y - dy.y*sx2 - dx.x*t22);
f2.y += fp2.x*wxScaled.y*(1-dx.y*dx.y) + fp2.z*(dz.y*sy2 ) - fp2.y*(( dx.y*dy.y )*wxScaled.y - dy.y*sy2 + dx.x*t21 + dx.z*t23) + f.y*originWeights.y;
f2.z += fp2.x*wxScaled.y*( -dx.y*dx.z) + fp2.z*(dz.y*sz2+t21) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.y - dy.y*sz2 - dx.z*t22);
f3.x += fp2.x*wxScaled.z*( -dx.y*dx.x) + fp2.z*(dz.y*sx3-t33) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.z - dy.y*sx3 - dx.x*t32);
f3.y += fp2.x*wxScaled.z*(1-dx.y*dx.y) + fp2.z*(dz.y*sy3 ) - fp2.y*(( dx.y*dy.y )*wxScaled.z - dy.y*sy3 + dx.x*t31 + dx.z*t33) + f.y*originWeights.z;
f3.z += fp2.x*wxScaled.z*( -dx.y*dx.z) + fp2.z*(dz.y*sz3+t31) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.z - dy.y*sz3 - dx.z*t32);
f1.x += fp3.x*wxScaled.x*( -dx.z*dx.x) + fp3.z*(dz.z*sx1+t12) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.x + dy.z*sx1 + dx.x*t13);
f1.y += fp3.x*wxScaled.x*( -dx.z*dx.y) + fp3.z*(dz.z*sy1-t11) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.x + dy.z*sy1 + dx.y*t13);
f1.z += fp3.x*wxScaled.x*(1-dx.z*dx.z) + fp3.z*(dz.z*sz1 ) + fp3.y*((-dx.z*dy.z )*wxScaled.x + dy.z*sz1 - dx.x*t11 - dx.y*t12) + f.z*originWeights.x;
f2.x += fp3.x*wxScaled.y*( -dx.z*dx.x) + fp3.z*(dz.z*sx2+t22) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.y + dy.z*sx2 + dx.x*t23);
f2.y += fp3.x*wxScaled.y*( -dx.z*dx.y) + fp3.z*(dz.z*sy2-t21) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.y + dy.z*sy2 + dx.y*t23);
f2.z += fp3.x*wxScaled.y*(1-dx.z*dx.z) + fp3.z*(dz.z*sz2 ) + fp3.y*((-dx.z*dy.z )*wxScaled.y + dy.z*sz2 - dx.x*t21 - dx.y*t22) + f.z*originWeights.y;
f3.x += fp3.x*wxScaled.z*( -dx.z*dx.x) + fp3.z*(dz.z*sx3+t32) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.z + dy.z*sx3 + dx.x*t33);
f3.y += fp3.x*wxScaled.z*( -dx.z*dx.y) + fp3.z*(dz.z*sy3-t31) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.z + dy.z*sy3 + dx.y*t33);
f3.z += fp3.x*wxScaled.z*(1-dx.z*dx.z) + fp3.z*(dz.z*sz3 ) + fp3.y*((-dx.z*dy.z )*wxScaled.z + dy.z*sz3 - dx.x*t31 - dx.y*t32) + f.z*originWeights.z;
addForce(atoms.y, force, f1);
addForce(atoms.z, force, f2);
addForce(atoms.w, force, f3);
for (int j = start; j < end; j++) {
real originWeight = localCoordsWeights[3*j];
real wx = localCoordsWeights[3*j+1];
real wy = localCoordsWeights[3*j+2];
mixed wxScaled = wx*invNormXdir;
mixed t1 = (wx*ydir.x-wy*xdir.x)*invNormZdir;
mixed t2 = (wx*ydir.y-wy*xdir.y)*invNormZdir;
mixed t3 = (wx*ydir.z-wy*xdir.z)*invNormZdir;
mixed sx = t3*dz.y-t2*dz.z;
mixed sy = t1*dz.z-t3*dz.x;
mixed sz = t2*dz.x-t1*dz.y;
real3 fresult = make_real3(0);
fresult.x += fp1.x*wxScaled*(1-dx.x*dx.x) + fp1.z*(dz.x*sx ) + fp1.y*((-dx.x*dy.x )*wxScaled + dy.x*sx - dx.y*t2 - dx.z*t3) + f.x*originWeight;
fresult.y += fp1.x*wxScaled*( -dx.x*dx.y) + fp1.z*(dz.x*sy+t3) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled + dy.x*sy + dx.y*t1);
fresult.z += fp1.x*wxScaled*( -dx.x*dx.z) + fp1.z*(dz.x*sz-t2) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled + dy.x*sz + dx.z*t1);
fresult.x += fp2.x*wxScaled*( -dx.y*dx.x) + fp2.z*(dz.y*sx-t3) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled - dy.y*sx - dx.x*t2);
fresult.y += fp2.x*wxScaled*(1-dx.y*dx.y) + fp2.z*(dz.y*sy ) - fp2.y*(( dx.y*dy.y )*wxScaled - dy.y*sy + dx.x*t1 + dx.z*t3) + f.y*originWeight;
fresult.z += fp2.x*wxScaled*( -dx.y*dx.z) + fp2.z*(dz.y*sz+t1) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled - dy.y*sz - dx.z*t2);
fresult.x += fp3.x*wxScaled*( -dx.z*dx.x) + fp3.z*(dz.z*sx+t2) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled + dy.z*sx + dx.x*t3);
fresult.y += fp3.x*wxScaled*( -dx.z*dx.y) + fp3.z*(dz.z*sy-t1) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled + dy.z*sy + dx.y*t3);
fresult.z += fp3.x*wxScaled*(1-dx.z*dx.z) + fp3.z*(dz.z*sz ) + fp3.y*((-dx.z*dy.z )*wxScaled + dy.z*sz - dx.x*t1 - dx.y*t2) + f.z*originWeight;
addForce(localCoordsAtoms[j], force, fresult);
}
}
}
......
......@@ -527,7 +527,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeightVec.push_back(mm_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
}
else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
// An out of plane site.
// A local coordinates site.
const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
int numParticles = site.getNumParticles();
......
......@@ -101,7 +101,7 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
xdir *= invNormXdir;
zdir *= invNormZdir;
ydir = cross(zdir, xdir);
mixed3 localPosition = localCoordsPos[index].xyz;
mixed3 localPosition = convert_mixed4(localCoordsPos[index]).xyz;
mixed4 pos = loadPos(posq, posqCorrection, siteAtomIndex);
pos.x = origin.x + xdir.x*localPosition.x + ydir.x*localPosition.y + zdir.x*localPosition.z;
pos.y = origin.y + xdir.y*localPosition.x + ydir.y*localPosition.y + zdir.y*localPosition.z;
......@@ -247,7 +247,7 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
mixed3 dx = xdir*invNormXdir;
mixed3 dz = zdir*invNormZdir;
mixed3 dy = cross(dz, dx);
mixed3 localPosition = localCoordsPos[index].xyz;
mixed3 localPosition = convert_mixed4(localCoordsPos[index]).xyz;
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
......@@ -266,7 +266,7 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
mixed sx = t3*dz.y-t2*dz.z;
mixed sy = t1*dz.z-t3*dz.x;
mixed sz = t2*dz.x-t1*dz.y;
mixed4 fresult = 0;
real4 fresult = 0;
fresult.x += fp1.x*wxScaled*(1-dx.x*dx.x) + fp1.z*(dz.x*sx ) + fp1.y*((-dx.x*dy.x )*wxScaled + dy.x*sx - dx.y*t2 - dx.z*t3) + f.x*originWeight;
fresult.y += fp1.x*wxScaled*( -dx.x*dx.y) + fp1.z*(dz.x*sy+t3) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled + dy.x*sy + dx.y*t1);
fresult.z += fp1.x*wxScaled*( -dx.x*dx.z) + fp1.z*(dz.x*sz-t2) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled + dy.x*sz + dx.z*t1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment