/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Rossen P. Apostolov, Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ /* Define multiply operations for floats */ __device__ float2 MultofFloat2(float2 a, float2 b) { float2 c; c.x = a.x * b.x - a.y * b.y; c.y = a.x * b.y + a.y * b.x; return c; } __device__ float2 ConjMultofFloat2(float2 a, float2 b) { float2 c; c.x = a.x*b.x + a.y*b.y; c.y = a.y*b.x - a.x*b.y; return c; } __device__ float2 FloatMultFloat2(float r, float2 a) { float2 b; b.x = r*a.x; b.y = r*a.y; return b; } __device__ float2 FloatMultConjFloat2(float r, float2 a) { float2 b; b.x = r*a.y; b.y = r*a.x; return b; } __global__ void kCalculateEwaldFastEikr_kernel() { int kmax = cSim.kmax; float4 apos; unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x; while (atom < cSim.atoms) { //generic form of the array // pEikr[ atomID*kmax*3 + k*3 + m] for(unsigned int m = 0; (m < 3); m++) { // k = 0, explicitly cSim.pEikr[atom*kmax*3 + 0 + m].x = 1; cSim.pEikr[atom*kmax*3 + 0 + m].y = 0; // k = 1, explicitly cSim.pEikr[atom*kmax*3 + 3 + m].x = cos(apos.x*cSim.recipBoxSizeX); cSim.pEikr[atom*kmax*3 + 3 + m].y = sin(apos.x*cSim.recipBoxSizeX); cSim.pEikr[atom*kmax*3 + 4 + m].x = cos(apos.y*cSim.recipBoxSizeY); cSim.pEikr[atom*kmax*3 + 4 + m].y = sin(apos.y*cSim.recipBoxSizeY); cSim.pEikr[atom*kmax*3 + 5 + m].x = cos(apos.z*cSim.recipBoxSizeZ); cSim.pEikr[atom*kmax*3 + 5 + m].y = sin(apos.z*cSim.recipBoxSizeZ); } // k > 1, by recursion for(unsigned int k=2; (k= 0) { // tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1); tab_xy = MultofFloat2 (cSim.pEikr[atom*kmax*3 + rx*3 + 0] , cSim.pEikr[atom*kmax*3 + ry*3 + 1]); } else { // tab_xy[n] = EIR(rx, n, 0) * conj (EIR(-ry, n, 1)); tab_xy = ConjMultofFloat2 (cSim.pEikr[atom*kmax*3 + rx*3 + 0] , cSim.pEikr[atom*kmax*3 - ry*3 + 1]); } for (int rz = lowrz; rz < numRz; rz++) { // next one is scary! index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 ); if( rz >= 0) { //tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * EIR(rz, n, 2)); cSim.pStructureFactor[atom*totalK + index] = FloatMultFloat2 ( apos.w , MultofFloat2 (tab_xy ,cSim.pEikr[atom*kmax*3 + rz*3 + 2] )); } else { // tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * conj(EIR(-rz, n, 2))); cSim.pStructureFactor[atom*totalK + index] = FloatMultFloat2 ( apos.w , ConjMultofFloat2 (tab_xy ,cSim.pEikr[atom*kmax*3 - rz*3 + 2] )); } cSim.pCosSinSum[index].x += cSim.pStructureFactor[atom*totalK + index].x; cSim.pCosSinSum[index].y += cSim.pStructureFactor[atom*totalK + index].y; lowrz = 1 - numRz; } lowry = 1 - numRy; } } // ********************************************************************** atom += blockDim.x * gridDim.x; } } __global__ void kCalculateEwaldFastForces_kernel() { float PI = 3.14159265358979323846f; // hard-coded maximum k-vectors, no interface yet // int kmax = cSim.kmax; const float epsilon = 1.0; float recipCoeff = (4*PI/cSim.V/epsilon); // float2 eikr; // float4 apos; int lowry = 0; int lowrz = 1; int numRx = 20+1; int numRy = 20+1; int numRz = 20+1; unsigned int totalK = (numRx*2 - 1) * (numRy*2 - 1) * (numRz*2 - 1); int index; unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x; while (atom < cSim.atoms) { // apos = cSim.pPosq[atom]; // ********************************************************************** // cSim.pEikr[atom*kmax*3 + k*3 + m] for(int rx = 0; rx < numRx; rx++) { float kx = rx * cSim.recipBoxSizeX; for(int ry = lowry; ry < numRy; ry++) { float ky = ry * cSim.recipBoxSizeY; for (int rz = lowrz; rz < numRz; rz++) { float kz = rz * cSim.recipBoxSizeZ; // next one is scary! index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 ); float k2 = kx * kx + ky * ky + kz * kz; float ak = exp(k2*cSim.factorEwald) / k2; float dEdR = ak * (cSim.pCosSinSum[index].x * cSim.pStructureFactor[atom*totalK + index].y - cSim.pCosSinSum[index].y * cSim.pStructureFactor[atom*totalK + index].x); cSim.pForce4[atom].x += 2 * recipCoeff * dEdR * kx ; cSim.pForce4[atom].y += 2 * recipCoeff * dEdR * ky ; cSim.pForce4[atom].z += 2 * recipCoeff * dEdR * kz ; lowrz = 1 - numRz; } lowry = 1 - numRy; } } // ********************************************************************** atom += blockDim.x * gridDim.x; } }