Commit 59bd8d19 authored by peastman's avatar peastman
Browse files

Merge pull request #1165 from peastman/mixedenergy

Further improved energy accuracy in mixed precision mode
parents a20944f6 1f2b65da
...@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
/** /**
* Compute forces on donors. * Compute forces on donors.
*/ */
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions, __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0; mixed energy = 0;
real4 f1 = (real4) 0; real4 f1 = (real4) 0;
real4 f2 = (real4) 0; real4 f2 = (real4) 0;
real4 f3 = (real4) 0; real4 f3 = (real4) 0;
...@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global ...@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
/** /**
* Compute forces on acceptors. * Compute forces on acceptors.
*/ */
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions, __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
......
...@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e ...@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
* Compute the interaction. * Compute the interaction.
*/ */
__kernel void computeInteraction( __kernel void computeInteraction(
__global long* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global long* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const int* restrict neighbors, __global const int* restrict neighborStartIndex , __global const int* restrict neighbors, __global const int* restrict neighborStartIndex
...@@ -84,7 +84,7 @@ __kernel void computeInteraction( ...@@ -84,7 +84,7 @@ __kernel void computeInteraction(
, __global int* restrict exclusions, __global int* restrict exclusionStartIndex , __global int* restrict exclusions, __global int* restrict exclusionStartIndex
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0.0f; mixed energy = 0;
// Loop over particles to be the first one in the set. // Loop over particles to be the first one in the set.
......
...@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups( ...@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
#else #else
__global real4* restrict forceBuffers, __global real4* restrict forceBuffers,
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE; const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex
real energy = 0.0f; mixed energy = 0;
__local AtomData localData[LOCAL_MEMORY_SIZE]; __local AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
......
...@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) { ...@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
* Precompute the cosine and sine sums which appear in each force term. * Precompute the cosine and sine sums which appear in each force term.
*/ */
__kernel void calculateEwaldCosSinSums(__global real* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) { __kernel void calculateEwaldCosSinSums(__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
const unsigned int ksizex = 2*KMAX_X-1; const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1; const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1; const unsigned int ksizez = 2*KMAX_Z-1;
const unsigned int totalK = ksizex*ksizey*ksizez; const unsigned int totalK = ksizex*ksizey*ksizez;
unsigned int index = get_global_id(0); unsigned int index = get_global_id(0);
real energy = 0.0f; mixed energy = 0;
while (index < (KMAX_Y-1)*ksizez+KMAX_Z) while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
index += get_global_size(0); index += get_global_size(0);
while (index < totalK) { while (index < totalK) {
......
...@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1( ...@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
#else #else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce, __global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1( ...@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
const unsigned int warp = get_global_id(0)/TILE_SIZE; const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx; const unsigned int tbx = get_local_id(0) - tgx;
real energy = 0.0f; mixed energy = 0;
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE]; __local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
......
...@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor ...@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
__global const long* restrict bornForceIn, __global const long* restrict bornForceIn,
#endif #endif
__global real* restrict energyBuffer, __global const float2* restrict params, __global const real* restrict bornRadii, __global const real* restrict obcChain) { __global mixed* restrict energyBuffer, __global const float2* restrict params, __global const real* restrict bornRadii, __global const real* restrict obcChain) {
real energy = 0.0f; mixed energy = 0;
unsigned int index = get_global_id(0); unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
// Sum the Born force // Sum the Born force
......
...@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1( ...@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
#else #else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce, __global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1( ...@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
unsigned int numTiles, unsigned int numTiles,
#endif #endif
__global const ushort2* exclusionTiles) { __global const ushort2* exclusionTiles) {
real energy = 0.0f; mixed energy = 0;
__local AtomData2 localData[TILE_SIZE]; __local AtomData2 localData[TILE_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
......
...@@ -22,7 +22,7 @@ __kernel void computeNonbonded( ...@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
#else #else
__global real4* restrict forceBuffers, __global real4* restrict forceBuffers,
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
...@@ -429,6 +429,6 @@ __kernel void computeNonbonded( ...@@ -429,6 +429,6 @@ __kernel void computeNonbonded(
pos++; pos++;
} }
#ifdef INCLUDE_ENERGY #ifdef INCLUDE_ENERGY
energyBuffer[get_global_id(0)] += (real) energy; energyBuffer[get_global_id(0)] += energy;
#endif #endif
} }
...@@ -19,7 +19,7 @@ __kernel void computeNonbonded( ...@@ -19,7 +19,7 @@ __kernel void computeNonbonded(
#else #else
__global real4* restrict forceBuffers, __global real4* restrict forceBuffers,
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
...@@ -27,7 +27,7 @@ __kernel void computeNonbonded( ...@@ -27,7 +27,7 @@ __kernel void computeNonbonded(
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0; mixed energy = 0;
__local AtomData localData[TILE_SIZE]; __local AtomData localData[TILE_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
......
...@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c ...@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
} }
} }
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global real* restrict energyBuffer, __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixed* restrict energyBuffer,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, __global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half // R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z; const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real energy = 0; mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) { for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices // real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z)); int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
......
...@@ -23,8 +23,8 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum, ...@@ -23,8 +23,8 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum,
/** /**
* Apply the surface area term to the force and energy. * Apply the surface area term to the force and energy.
*/ */
extern "C" __global__ void computeSurfaceAreaForce(long long* __restrict__ bornForce, real* __restrict__ energyBuffer, const float2* __restrict__ params, const real* __restrict__ bornRadii) { extern "C" __global__ void computeSurfaceAreaForce(long long* __restrict__ bornForce, mixed* __restrict__ energyBuffer, const float2* __restrict__ params, const real* __restrict__ bornRadii) {
real energy = 0; mixed energy = 0;
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real bornRadius = bornRadii[index]; real bornRadius = bornRadii[index];
float radius = params[index].x; float radius = params[index].x;
...@@ -216,7 +216,7 @@ inline __device__ void zeroAtomData(AtomData2& data) { ...@@ -216,7 +216,7 @@ inline __device__ void zeroAtomData(AtomData2& data) {
* Compute electrostatic interactions. * Compute electrostatic interactions.
*/ */
extern "C" __global__ void computeGKForces( extern "C" __global__ void computeGKForces(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole, const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) { const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) {
...@@ -225,7 +225,7 @@ extern "C" __global__ void computeGKForces( ...@@ -225,7 +225,7 @@ extern "C" __global__ void computeGKForces(
const unsigned int numTiles = numTileIndices; const unsigned int numTiles = numTileIndices;
unsigned int pos = (unsigned int) (startTileIndex+warp*(long long)numTiles/totalWarps); unsigned int pos = (unsigned int) (startTileIndex+warp*(long long)numTiles/totalWarps);
unsigned int end = (unsigned int) (startTileIndex+(warp+1)*(long long)numTiles/totalWarps); unsigned int end = (unsigned int) (startTileIndex+(warp+1)*(long long)numTiles/totalWarps);
real energy = 0; mixed energy = 0;
__shared__ AtomData2 localData[GK_FORCE_THREAD_BLOCK_SIZE]; __shared__ AtomData2 localData[GK_FORCE_THREAD_BLOCK_SIZE];
do { do {
...@@ -605,7 +605,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -605,7 +605,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
* Compute electrostatic interactions. * Compute electrostatic interactions.
*/ */
extern "C" __global__ void computeEDiffForce( extern "C" __global__ void computeEDiffForce(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
...@@ -615,7 +615,7 @@ extern "C" __global__ void computeEDiffForce( ...@@ -615,7 +615,7 @@ extern "C" __global__ void computeEDiffForce(
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
real energy = 0; mixed energy = 0;
__shared__ AtomData4 localData[EDIFF_THREAD_BLOCK_SIZE]; __shared__ AtomData4 localData[EDIFF_THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
......
...@@ -191,14 +191,14 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real rmi ...@@ -191,14 +191,14 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real rmi
/** /**
* Compute WCA interaction. * Compute WCA interaction.
*/ */
extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const float2* __restrict__ radiusEpsilon) { const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const float2* __restrict__ radiusEpsilon) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices; const unsigned int numTiles = numTileIndices;
unsigned int pos = (unsigned int) (startTileIndex+warp*(long long)numTiles/totalWarps); unsigned int pos = (unsigned int) (startTileIndex+warp*(long long)numTiles/totalWarps);
unsigned int end = (unsigned int) (startTileIndex+(warp+1)*(long long)numTiles/totalWarps); unsigned int end = (unsigned int) (startTileIndex+(warp+1)*(long long)numTiles/totalWarps);
real energy = 0; mixed energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
do { do {
......
...@@ -54,7 +54,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -54,7 +54,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f)); return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
} }
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor, real& energy) { __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor, mixed& energy) {
// Compute the displacement. // Compute the displacement.
real3 delta; real3 delta;
...@@ -374,7 +374,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has ...@@ -374,7 +374,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
* Compute electrostatic interactions. * Compute electrostatic interactions.
*/ */
extern "C" __global__ void computeElectrostatics( extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -388,7 +388,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -388,7 +388,7 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
real energy = 0; mixed energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
......
...@@ -874,14 +874,14 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri ...@@ -874,14 +874,14 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
} }
extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole, long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
const real* __restrict__ phi, const real* __restrict__ cphi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { const real* __restrict__ phi, const real* __restrict__ cphi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
real multipole[10]; real multipole[10];
const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19}; const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16}; const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18}; const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
real energy = 0; mixed energy = 0;
__shared__ real fracToCart[3][3]; __shared__ real fracToCart[3][3];
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x; fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
...@@ -956,7 +956,7 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -956,7 +956,7 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
} }
extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole, long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
const real* __restrict__ inducedDipole_global, const real* __restrict__ inducedDipolePolar_global, const real* __restrict__ inducedDipole_global, const real* __restrict__ inducedDipolePolar_global,
const real* __restrict__ phi, const real* __restrict__ phid, const real* __restrict__ phip, const real* __restrict__ phi, const real* __restrict__ phid, const real* __restrict__ phip,
...@@ -967,7 +967,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -967,7 +967,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19}; const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16}; const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18}; const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
real energy = 0; mixed energy = 0;
__shared__ real fracToCart[3][3]; __shared__ real fracToCart[3][3];
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x; fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
......
...@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
} }
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor, __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor,
real& energy, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) { mixed& energy, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
// Compute the displacement. // Compute the displacement.
real3 delta; real3 delta;
...@@ -411,7 +411,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has ...@@ -411,7 +411,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
/** /**
* Compute the self energy and self torque. * Compute the self energy and self torque.
*/ */
__device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) { __device__ void computeSelfEnergyAndTorque(AtomData& atom1, mixed& energy) {
real cii = atom1.q*atom1.q; real cii = atom1.q*atom1.q;
real3 dipole = make_real3(atom1.sphericalDipole.y, atom1.sphericalDipole.z, atom1.sphericalDipole.x); real3 dipole = make_real3(atom1.sphericalDipole.y, atom1.sphericalDipole.z, atom1.sphericalDipole.x);
real dii = dot(dipole, dipole+atom1.inducedDipole); real dii = dot(dipole, dipole+atom1.inducedDipole);
...@@ -439,7 +439,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) { ...@@ -439,7 +439,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
* Compute electrostatic interactions. * Compute electrostatic interactions.
*/ */
extern "C" __global__ void computeElectrostatics( extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -453,7 +453,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -453,7 +453,7 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
real energy = 0; mixed energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment