Commit 80c223a8 authored by peastman's avatar peastman
Browse files

OpenCL implementation of LocalCoordinatesSite depending on arbitrary particles

parent 26809372
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2014 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -156,8 +156,11 @@ private:
OpenCLArray* vsite3AvgWeights;
OpenCLArray* vsiteOutOfPlaneAtoms;
OpenCLArray* vsiteOutOfPlaneWeights;
OpenCLArray* vsiteLocalCoordsIndex;
OpenCLArray* vsiteLocalCoordsAtoms;
OpenCLArray* vsiteLocalCoordsParams;
OpenCLArray* vsiteLocalCoordsWeights;
OpenCLArray* vsiteLocalCoordsPos;
OpenCLArray* vsiteLocalCoordsStartIndex;
int randomPos;
int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -102,7 +102,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedHostBuffer(NULL),
vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsAtoms(NULL), vsiteLocalCoordsParams(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsIndex(NULL), vsiteLocalCoordsAtoms(NULL),
vsiteLocalCoordsWeights(NULL), vsiteLocalCoordsPos(NULL), vsiteLocalCoordsStartIndex(NULL),
hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false), hasOverlappingVsites(false) {
// Create workspace arrays.
......@@ -497,8 +498,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vector<mm_double4> vsite3AvgWeightVec;
vector<mm_int4> vsiteOutOfPlaneAtomVec;
vector<mm_double4> vsiteOutOfPlaneWeightVec;
vector<mm_int4> vsiteLocalCoordsAtomVec;
vector<cl_double> vsiteLocalCoordsParamVec;
vector<cl_int> vsiteLocalCoordsIndexVec;
vector<cl_int> vsiteLocalCoordsAtomVec;
vector<cl_int> vsiteLocalCoordsStartVec;
vector<cl_double> vsiteLocalCoordsWeightVec;
vector<mm_double4> vsiteLocalCoordsPosVec;
for (int i = 0; i < numAtoms; i++) {
if (system.isVirtualSite(i)) {
if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
......@@ -526,62 +530,70 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// An out of plane site.
const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
vsiteLocalCoordsAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
Vec3 origin = site.getOriginWeights();
Vec3 x = site.getXWeights();
Vec3 y = site.getYWeights();
int numParticles = site.getNumParticles();
vector<double> origin, x, y;
site.getOriginWeights(origin);
site.getXWeights(x);
site.getYWeights(y);
vsiteLocalCoordsIndexVec.push_back(i);
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
for (int j = 0; j < numParticles; j++) {
vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
vsiteLocalCoordsWeightVec.push_back(origin[j]);
vsiteLocalCoordsWeightVec.push_back(x[j]);
vsiteLocalCoordsWeightVec.push_back(y[j]);
}
Vec3 pos = site.getLocalPosition();
vsiteLocalCoordsParamVec.push_back(origin[0]);
vsiteLocalCoordsParamVec.push_back(origin[1]);
vsiteLocalCoordsParamVec.push_back(origin[2]);
vsiteLocalCoordsParamVec.push_back(x[0]);
vsiteLocalCoordsParamVec.push_back(x[1]);
vsiteLocalCoordsParamVec.push_back(x[2]);
vsiteLocalCoordsParamVec.push_back(y[0]);
vsiteLocalCoordsParamVec.push_back(y[1]);
vsiteLocalCoordsParamVec.push_back(y[2]);
vsiteLocalCoordsParamVec.push_back(pos[0]);
vsiteLocalCoordsParamVec.push_back(pos[1]);
vsiteLocalCoordsParamVec.push_back(pos[2]);
vsiteLocalCoordsPosVec.push_back(mm_double4(pos[0], pos[1], pos[2], 0.0));
}
}
}
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
int numLocalCoords = vsiteLocalCoordsAtomVec.size();
int numLocalCoords = vsiteLocalCoordsPosVec.size();
numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
vsite2AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite3AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsiteOutOfPlaneAtoms = OpenCLArray::create<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteLocalCoordsAtoms = OpenCLArray::create<mm_int4>(context, max(1, numLocalCoords), "vsiteLocalCoordinatesAtoms");
vsiteLocalCoordsIndex = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
vsiteLocalCoordsAtoms = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
vsiteLocalCoordsStartIndex = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
if (num2Avg > 0)
vsite2AvgAtoms->upload(vsite2AvgAtomVec);
if (num3Avg > 0)
vsite3AvgAtoms->upload(vsite3AvgAtomVec);
if (numOutOfPlane > 0)
vsiteOutOfPlaneAtoms->upload(vsiteOutOfPlaneAtomVec);
if (numLocalCoords > 0)
if (numLocalCoords > 0) {
vsiteLocalCoordsIndex->upload(vsiteLocalCoordsIndexVec);
vsiteLocalCoordsAtoms->upload(vsiteLocalCoordsAtomVec);
vsiteLocalCoordsStartIndex->upload(vsiteLocalCoordsStartVec);
}
if (context.getUseDoublePrecision()) {
vsite2AvgWeights = OpenCLArray::create<mm_double2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = OpenCLArray::create<mm_double4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsParams = OpenCLArray::create<cl_double>(context, max(1, 12*numLocalCoords), "vsiteLocalCoordinatesParams");
vsiteLocalCoordsWeights = OpenCLArray::create<cl_double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = OpenCLArray::create<mm_double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0)
vsite2AvgWeights->upload(vsite2AvgWeightVec);
if (num3Avg > 0)
vsite3AvgWeights->upload(vsite3AvgWeightVec);
if (numOutOfPlane > 0)
vsiteOutOfPlaneWeights->upload(vsiteOutOfPlaneWeightVec);
if (numLocalCoords > 0)
vsiteLocalCoordsParams->upload(vsiteLocalCoordsParamVec);
if (numLocalCoords > 0) {
vsiteLocalCoordsWeights->upload(vsiteLocalCoordsWeightVec);
vsiteLocalCoordsPos->upload(vsiteLocalCoordsPosVec);
}
}
else {
vsite2AvgWeights = OpenCLArray::create<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = OpenCLArray::create<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsParams = OpenCLArray::create<float>(context, max(1, 12*numLocalCoords), "vsiteLocalCoordinatesParams");
vsiteLocalCoordsWeights = OpenCLArray::create<cl_float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = OpenCLArray::create<mm_float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0) {
vector<mm_float2> floatWeights(num2Avg);
for (int i = 0; i < num2Avg; i++)
......@@ -601,10 +613,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeights->upload(floatWeights);
}
if (numLocalCoords > 0) {
vector<cl_float> floatParams(vsiteLocalCoordsParamVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsParamVec.size(); i++)
floatParams[i] = (cl_float) vsiteLocalCoordsParamVec[i];
vsiteLocalCoordsParams->upload(floatParams);
vector<cl_float> floatWeights(vsiteLocalCoordsWeightVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++)
floatWeights[i] = (cl_float) vsiteLocalCoordsWeightVec[i];
vsiteLocalCoordsWeights->upload(floatWeights);
vector<mm_float4> floatPos(vsiteLocalCoordsPosVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++)
floatPos[i] = mm_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f);
vsiteLocalCoordsPos->upload(floatPos);
}
}
......@@ -645,8 +661,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsParams->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex->getDeviceBuffer());
vsiteForceKernel = cl::Kernel(vsiteProgram, "distributeForces");
index = 0;
vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
......@@ -661,8 +680,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsParams->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex->getDeviceBuffer());
if (hasOverlappingVsites && context.getSupports64BitGlobalAtomics())
vsiteAddForcesKernel = cl::Kernel(vsiteProgram, "addDistributedForces");
}
......@@ -718,10 +740,16 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete vsiteOutOfPlaneAtoms;
if (vsiteOutOfPlaneWeights != NULL)
delete vsiteOutOfPlaneWeights;
if (vsiteLocalCoordsIndex != NULL)
delete vsiteLocalCoordsIndex;
if (vsiteLocalCoordsAtoms != NULL)
delete vsiteLocalCoordsAtoms;
if (vsiteLocalCoordsParams != NULL)
delete vsiteLocalCoordsParams;
if (vsiteLocalCoordsWeights != NULL)
delete vsiteLocalCoordsWeights;
if (vsiteLocalCoordsPos != NULL)
delete vsiteLocalCoordsPos;
if (vsiteLocalCoordsStartIndex != NULL)
delete vsiteLocalCoordsStartIndex;
}
void OpenCLIntegrationUtilities::setNextStepSize(double size) {
......
......@@ -33,7 +33,9 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
__global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights,
__global const int4* restrict localCoordsAtoms, __global const real* restrict localCoordsParams) {
__global const int* restrict localCoordsIndex, __global const int* restrict localCoordsAtoms,
__global const real* restrict localCoordsWeights, __global const real4* restrict localCoordsPos,
__global const int* restrict localCoordsStartIndex) {
#ifndef USE_MIXED_PRECISION
__global real4* posqCorrection = 0;
#endif
......@@ -81,30 +83,30 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
// Local coordinates sites.
for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4 atoms = localCoordsAtoms[index];
__global const real* params = &localCoordsParams[12*index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1_4 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2_4 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3_4 = loadPos(posq, posqCorrection, atoms.w);
mixed4 pos1 = (mixed4) (pos1_4.x, pos1_4.y, pos1_4.z, 0);
mixed4 pos2 = (mixed4) (pos2_4.x, pos2_4.y, pos2_4.z, 0);
mixed4 pos3 = (mixed4) (pos3_4.x, pos3_4.y, pos3_4.z, 0);
mixed4 originWeights = (mixed4) (params[0], params[1], params[2], 0);
mixed4 xWeights = (mixed4) (params[3], params[4], params[5], 0);
mixed4 yWeights = (mixed4) (params[6], params[7], params[8], 0);
mixed4 localPosition = (mixed4) (params[9], params[10], params[11], 0);
mixed4 origin = pos1*originWeights.x + pos2*originWeights.y + pos3*originWeights.z;
mixed4 xdir = pos1*xWeights.x + pos2*xWeights.y + pos3*xWeights.z;
mixed4 ydir = pos1*yWeights.x + pos2*yWeights.y + pos3*yWeights.z;
mixed4 zdir = cross(xdir, ydir);
xdir *= rsqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
zdir *= rsqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
int siteAtomIndex = localCoordsIndex[index];
int start = localCoordsStartIndex[index];
int end = localCoordsStartIndex[index+1];
mixed3 origin = 0, xdir = 0, ydir = 0;
for (int j = start; j < end; j++) {
mixed3 pos = loadPos(posq, posqCorrection, localCoordsAtoms[j]).xyz;
origin += pos*localCoordsWeights[3*j];
xdir += pos*localCoordsWeights[3*j+1];
ydir += pos*localCoordsWeights[3*j+2];
}
mixed3 zdir = cross(xdir, ydir);
mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
xdir *= invNormXdir;
zdir *= invNormZdir;
ydir = cross(zdir, xdir);
mixed3 localPosition = localCoordsPos[index].xyz;
mixed4 pos = loadPos(posq, posqCorrection, siteAtomIndex);
pos.x = origin.x + xdir.x*localPosition.x + ydir.x*localPosition.y + zdir.x*localPosition.z;
pos.y = origin.y + xdir.y*localPosition.x + ydir.y*localPosition.y + zdir.y*localPosition.z;
pos.z = origin.z + xdir.z*localPosition.x + ydir.z*localPosition.y + zdir.z*localPosition.z;
storePos(posq, posqCorrection, atoms.x, pos);
storePos(posq, posqCorrection, siteAtomIndex, pos);
}
}
......@@ -174,7 +176,9 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
__global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights,
__global const int4* restrict localCoordsAtoms, __global const real* restrict localCoordsParams) {
__global const int* restrict localCoordsIndex, __global const int* restrict localCoordsAtoms,
__global const real* restrict localCoordsWeights, __global const real4* restrict localCoordsPos,
__global const int* restrict localCoordsStartIndex) {
#ifndef USE_MIXED_PRECISION
__global real4* posqCorrection = 0;
#endif
......@@ -225,86 +229,54 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
// Local coordinates sites.
for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4 atoms = localCoordsAtoms[index];
__global const real* params = &localCoordsParams[12*index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1_4 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2_4 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3_4 = loadPos(posq, posqCorrection, atoms.w);
mixed4 pos1 = (mixed4) (pos1_4.x, pos1_4.y, pos1_4.z, 0);
mixed4 pos2 = (mixed4) (pos2_4.x, pos2_4.y, pos2_4.z, 0);
mixed4 pos3 = (mixed4) (pos3_4.x, pos3_4.y, pos3_4.z, 0);
mixed4 originWeights = (mixed4) (params[0], params[1], params[2], 0);
mixed4 wx = (mixed4) (params[3], params[4], params[5], 0);
mixed4 wy = (mixed4) (params[6], params[7], params[8], 0);
mixed4 localPosition = (mixed4) (params[9], params[10], params[11], 0);
mixed4 origin = pos1*originWeights.x + pos2*originWeights.y + pos3*originWeights.z;
mixed4 xdir = pos1*wx.x + pos2*wx.y + pos3*wx.z;
mixed4 ydir = pos1*wy.x + pos2*wy.y + pos3*wy.z;
mixed4 zdir = cross(xdir, ydir);
mixed invNormXdir = rsqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed invNormZdir = rsqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed4 dx = xdir*invNormXdir;
mixed4 dz = zdir*invNormZdir;
mixed4 dy = cross(dz, dx);
int siteAtomIndex = localCoordsIndex[index];
int start = localCoordsStartIndex[index];
int end = localCoordsStartIndex[index+1];
mixed3 origin = 0, xdir = 0, ydir = 0;
for (int j = start; j < end; j++) {
mixed3 pos = loadPos(posq, posqCorrection, localCoordsAtoms[j]).xyz;
origin += pos*localCoordsWeights[3*j];
xdir += pos*localCoordsWeights[3*j+1];
ydir += pos*localCoordsWeights[3*j+2];
}
mixed3 zdir = cross(xdir, ydir);
mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
mixed3 dx = xdir*invNormXdir;
mixed3 dz = zdir*invNormZdir;
mixed3 dy = cross(dz, dx);
mixed3 localPosition = localCoordsPos[index].xyz;
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
mixed t11 = (wx.x*ydir.x-wy.x*xdir.x)*invNormZdir;
mixed t12 = (wx.x*ydir.y-wy.x*xdir.y)*invNormZdir;
mixed t13 = (wx.x*ydir.z-wy.x*xdir.z)*invNormZdir;
mixed t21 = (wx.y*ydir.x-wy.y*xdir.x)*invNormZdir;
mixed t22 = (wx.y*ydir.y-wy.y*xdir.y)*invNormZdir;
mixed t23 = (wx.y*ydir.z-wy.y*xdir.z)*invNormZdir;
mixed t31 = (wx.z*ydir.x-wy.z*xdir.x)*invNormZdir;
mixed t32 = (wx.z*ydir.y-wy.z*xdir.y)*invNormZdir;
mixed t33 = (wx.z*ydir.z-wy.z*xdir.z)*invNormZdir;
mixed sx1 = t13*dz.y-t12*dz.z;
mixed sy1 = t11*dz.z-t13*dz.x;
mixed sz1 = t12*dz.x-t11*dz.y;
mixed sx2 = t23*dz.y-t22*dz.z;
mixed sy2 = t21*dz.z-t23*dz.x;
mixed sz2 = t22*dz.x-t21*dz.y;
mixed sx3 = t33*dz.y-t32*dz.z;
mixed sy3 = t31*dz.z-t33*dz.x;
mixed sz3 = t32*dz.x-t31*dz.y;
mixed4 wxScaled = wx*invNormXdir;
real4 f = force[atoms.x];
real4 f1 = 0;
real4 f2 = 0;
real4 f3 = 0;
mixed4 fp1 = localPosition*f.x;
mixed4 fp2 = localPosition*f.y;
mixed4 fp3 = localPosition*f.z;
f1.x += fp1.x*wxScaled.x*(1-dx.x*dx.x) + fp1.z*(dz.x*sx1 ) + fp1.y*((-dx.x*dy.x )*wxScaled.x + dy.x*sx1 - dx.y*t12 - dx.z*t13) + f.x*originWeights.x;
f1.y += fp1.x*wxScaled.x*( -dx.x*dx.y) + fp1.z*(dz.x*sy1+t13) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.x + dy.x*sy1 + dx.y*t11);
f1.z += fp1.x*wxScaled.x*( -dx.x*dx.z) + fp1.z*(dz.x*sz1-t12) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.x + dy.x*sz1 + dx.z*t11);
f2.x += fp1.x*wxScaled.y*(1-dx.x*dx.x) + fp1.z*(dz.x*sx2 ) + fp1.y*((-dx.x*dy.x )*wxScaled.y + dy.x*sx2 - dx.y*t22 - dx.z*t23) + f.x*originWeights.y;
f2.y += fp1.x*wxScaled.y*( -dx.x*dx.y) + fp1.z*(dz.x*sy2+t23) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.y + dy.x*sy2 + dx.y*t21);
f2.z += fp1.x*wxScaled.y*( -dx.x*dx.z) + fp1.z*(dz.x*sz2-t22) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.y + dy.x*sz2 + dx.z*t21);
f3.x += fp1.x*wxScaled.z*(1-dx.x*dx.x) + fp1.z*(dz.x*sx3 ) + fp1.y*((-dx.x*dy.x )*wxScaled.z + dy.x*sx3 - dx.y*t32 - dx.z*t33) + f.x*originWeights.z;
f3.y += fp1.x*wxScaled.z*( -dx.x*dx.y) + fp1.z*(dz.x*sy3+t33) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.z + dy.x*sy3 + dx.y*t31);
f3.z += fp1.x*wxScaled.z*( -dx.x*dx.z) + fp1.z*(dz.x*sz3-t32) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.z + dy.x*sz3 + dx.z*t31);
f1.x += fp2.x*wxScaled.x*( -dx.y*dx.x) + fp2.z*(dz.y*sx1-t13) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.x - dy.y*sx1 - dx.x*t12);
f1.y += fp2.x*wxScaled.x*(1-dx.y*dx.y) + fp2.z*(dz.y*sy1 ) - fp2.y*(( dx.y*dy.y )*wxScaled.x - dy.y*sy1 + dx.x*t11 + dx.z*t13) + f.y*originWeights.x;
f1.z += fp2.x*wxScaled.x*( -dx.y*dx.z) + fp2.z*(dz.y*sz1+t11) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.x - dy.y*sz1 - dx.z*t12);
f2.x += fp2.x*wxScaled.y*( -dx.y*dx.x) + fp2.z*(dz.y*sx2-t23) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.y - dy.y*sx2 - dx.x*t22);
f2.y += fp2.x*wxScaled.y*(1-dx.y*dx.y) + fp2.z*(dz.y*sy2 ) - fp2.y*(( dx.y*dy.y )*wxScaled.y - dy.y*sy2 + dx.x*t21 + dx.z*t23) + f.y*originWeights.y;
f2.z += fp2.x*wxScaled.y*( -dx.y*dx.z) + fp2.z*(dz.y*sz2+t21) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.y - dy.y*sz2 - dx.z*t22);
f3.x += fp2.x*wxScaled.z*( -dx.y*dx.x) + fp2.z*(dz.y*sx3-t33) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.z - dy.y*sx3 - dx.x*t32);
f3.y += fp2.x*wxScaled.z*(1-dx.y*dx.y) + fp2.z*(dz.y*sy3 ) - fp2.y*(( dx.y*dy.y )*wxScaled.z - dy.y*sy3 + dx.x*t31 + dx.z*t33) + f.y*originWeights.z;
f3.z += fp2.x*wxScaled.z*( -dx.y*dx.z) + fp2.z*(dz.y*sz3+t31) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.z - dy.y*sz3 - dx.z*t32);
f1.x += fp3.x*wxScaled.x*( -dx.z*dx.x) + fp3.z*(dz.z*sx1+t12) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.x + dy.z*sx1 + dx.x*t13);
f1.y += fp3.x*wxScaled.x*( -dx.z*dx.y) + fp3.z*(dz.z*sy1-t11) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.x + dy.z*sy1 + dx.y*t13);
f1.z += fp3.x*wxScaled.x*(1-dx.z*dx.z) + fp3.z*(dz.z*sz1 ) + fp3.y*((-dx.z*dy.z )*wxScaled.x + dy.z*sz1 - dx.x*t11 - dx.y*t12) + f.z*originWeights.x;
f2.x += fp3.x*wxScaled.y*( -dx.z*dx.x) + fp3.z*(dz.z*sx2+t22) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.y + dy.z*sx2 + dx.x*t23);
f2.y += fp3.x*wxScaled.y*( -dx.z*dx.y) + fp3.z*(dz.z*sy2-t21) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.y + dy.z*sy2 + dx.y*t23);
f2.z += fp3.x*wxScaled.y*(1-dx.z*dx.z) + fp3.z*(dz.z*sz2 ) + fp3.y*((-dx.z*dy.z )*wxScaled.y + dy.z*sz2 - dx.x*t21 - dx.y*t22) + f.z*originWeights.y;
f3.x += fp3.x*wxScaled.z*( -dx.z*dx.x) + fp3.z*(dz.z*sx3+t32) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.z + dy.z*sx3 + dx.x*t33);
f3.y += fp3.x*wxScaled.z*( -dx.z*dx.y) + fp3.z*(dz.z*sy3-t31) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.z + dy.z*sy3 + dx.y*t33);
f3.z += fp3.x*wxScaled.z*(1-dx.z*dx.z) + fp3.z*(dz.z*sz3 ) + fp3.y*((-dx.z*dy.z )*wxScaled.z + dy.z*sz3 - dx.x*t31 - dx.y*t32) + f.z*originWeights.z;
ADD_FORCE(atoms.y, f1);
ADD_FORCE(atoms.z, f2);
ADD_FORCE(atoms.w, f3);
real4 f = force[siteAtomIndex];
mixed3 fp1 = localPosition*f.x;
mixed3 fp2 = localPosition*f.y;
mixed3 fp3 = localPosition*f.z;
for (int j = start; j < end; j++) {
real originWeight = localCoordsWeights[3*j];
real wx = localCoordsWeights[3*j+1];
real wy = localCoordsWeights[3*j+2];
mixed wxScaled = wx*invNormXdir;
mixed t1 = (wx*ydir.x-wy*xdir.x)*invNormZdir;
mixed t2 = (wx*ydir.y-wy*xdir.y)*invNormZdir;
mixed t3 = (wx*ydir.z-wy*xdir.z)*invNormZdir;
mixed sx = t3*dz.y-t2*dz.z;
mixed sy = t1*dz.z-t3*dz.x;
mixed sz = t2*dz.x-t1*dz.y;
mixed4 fresult = 0;
fresult.x += fp1.x*wxScaled*(1-dx.x*dx.x) + fp1.z*(dz.x*sx ) + fp1.y*((-dx.x*dy.x )*wxScaled + dy.x*sx - dx.y*t2 - dx.z*t3) + f.x*originWeight;
fresult.y += fp1.x*wxScaled*( -dx.x*dx.y) + fp1.z*(dz.x*sy+t3) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled + dy.x*sy + dx.y*t1);
fresult.z += fp1.x*wxScaled*( -dx.x*dx.z) + fp1.z*(dz.x*sz-t2) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled + dy.x*sz + dx.z*t1);
fresult.x += fp2.x*wxScaled*( -dx.y*dx.x) + fp2.z*(dz.y*sx-t3) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled - dy.y*sx - dx.x*t2);
fresult.y += fp2.x*wxScaled*(1-dx.y*dx.y) + fp2.z*(dz.y*sy ) - fp2.y*(( dx.y*dy.y )*wxScaled - dy.y*sy + dx.x*t1 + dx.z*t3) + f.y*originWeight;
fresult.z += fp2.x*wxScaled*( -dx.y*dx.z) + fp2.z*(dz.y*sz+t1) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled - dy.y*sz - dx.z*t2);
fresult.x += fp3.x*wxScaled*( -dx.z*dx.x) + fp3.z*(dz.z*sx+t2) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled + dy.z*sx + dx.x*t3);
fresult.y += fp3.x*wxScaled*( -dx.z*dx.y) + fp3.z*(dz.z*sy-t1) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled + dy.z*sy + dx.y*t3);
fresult.z += fp3.x*wxScaled*(1-dx.z*dx.z) + fp3.z*(dz.z*sz ) + fp3.y*((-dx.z*dy.z )*wxScaled + dy.z*sz - dx.x*t1 - dx.y*t2) + f.z*originWeight;
ADD_FORCE(localCoordsAtoms[j], fresult);
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment