Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59bd8d19
Commit
59bd8d19
authored
Oct 02, 2015
by
peastman
Browse files
Merge pull request #1165 from peastman/mixedenergy
Further improved energy accuracy in mixed precision mode
parents
a20944f6
1f2b65da
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
40 additions
and
40 deletions
+40
-40
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+3
-3
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+2
-2
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+2
-2
platforms/opencl/src/kernels/ewald.cl
platforms/opencl/src/kernels/ewald.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+2
-2
platforms/opencl/src/kernels/gbsaObcReductions.cl
platforms/opencl/src/kernels/gbsaObcReductions.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+2
-2
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+2
-2
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+2
-2
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaGk.cu
plugins/amoeba/platforms/cuda/src/kernels/amoebaGk.cu
+6
-6
plugins/amoeba/platforms/cuda/src/kernels/amoebaWcaForce.cu
plugins/amoeba/platforms/cuda/src/kernels/amoebaWcaForce.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
...eba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
+4
-4
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
.../platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+4
-4
No files found.
platforms/opencl/src/kernels/customHbondForce.cl
View file @
59bd8d19
...
@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
/**
/**
* Compute forces on donors.
* Compute forces on donors.
*/
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
real
energy = 0;
mixed
energy = 0;
real4 f1 = (real4) 0;
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
real4 f2 = (real4) 0;
real4 f3 = (real4) 0;
real4 f3 = (real4) 0;
...
@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
/**
/**
* Compute forces on acceptors.
* Compute forces on acceptors.
*/
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
59bd8d19
...
@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
...
@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*
Compute
the
interaction.
*
Compute
the
interaction.
*/
*/
__kernel
void
computeInteraction
(
__kernel
void
computeInteraction
(
__global
long*
restrict
forceBuffers,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
long*
restrict
forceBuffers,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
...
@@ -84,7 +84,7 @@ __kernel void computeInteraction(
...
@@ -84,7 +84,7 @@ __kernel void computeInteraction(
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
.0f
;
mixed
energy
=
0
;
//
Loop
over
particles
to
be
the
first
one
in
the
set.
//
Loop
over
particles
to
be
the
first
one
in
the
set.
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
59bd8d19
...
@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
...
@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
real
energy
=
0
.0f
;
mixed
energy
=
0
;
__local
AtomData
localData[LOCAL_MEMORY_SIZE]
;
__local
AtomData
localData[LOCAL_MEMORY_SIZE]
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
...
...
platforms/opencl/src/kernels/ewald.cl
View file @
59bd8d19
...
@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
...
@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
*
Precompute
the
cosine
and
sine
sums
which
appear
in
each
force
term.
*
Precompute
the
cosine
and
sine
sums
which
appear
in
each
force
term.
*/
*/
__kernel
void
calculateEwaldCosSinSums
(
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real2*
restrict
cosSinSum,
real4
reciprocalPeriodicBoxSize,
real
reciprocalCoefficient
)
{
__kernel
void
calculateEwaldCosSinSums
(
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real2*
restrict
cosSinSum,
real4
reciprocalPeriodicBoxSize,
real
reciprocalCoefficient
)
{
const
unsigned
int
ksizex
=
2*KMAX_X-1
;
const
unsigned
int
ksizex
=
2*KMAX_X-1
;
const
unsigned
int
ksizey
=
2*KMAX_Y-1
;
const
unsigned
int
ksizey
=
2*KMAX_Y-1
;
const
unsigned
int
ksizez
=
2*KMAX_Z-1
;
const
unsigned
int
ksizez
=
2*KMAX_Z-1
;
const
unsigned
int
totalK
=
ksizex*ksizey*ksizez
;
const
unsigned
int
totalK
=
ksizex*ksizey*ksizez
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
real
energy
=
0
.0f
;
mixed
energy
=
0
;
while
(
index
<
(
KMAX_Y-1
)
*ksizez+KMAX_Z
)
while
(
index
<
(
KMAX_Y-1
)
*ksizez+KMAX_Z
)
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
while
(
index
<
totalK
)
{
while
(
index
<
totalK
)
{
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
59bd8d19
...
@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
...
@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
#else
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
#endif
__global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
...
@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
const unsigned int tbx = get_local_id(0) - tgx;
real
energy = 0
.0f
;
mixed
energy = 0;
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
platforms/opencl/src/kernels/gbsaObcReductions.cl
View file @
59bd8d19
...
@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
...
@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
const
long*
restrict
bornForceIn,
__global
const
long*
restrict
bornForceIn,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
real*
restrict
bornRadii,
__global
const
real*
restrict
obcChain
)
{
__global
mixed
*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
real*
restrict
bornRadii,
__global
const
real*
restrict
obcChain
)
{
real
energy
=
0
.0f
;
mixed
energy
=
0
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Sum
the
Born
force
//
Sum
the
Born
force
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
59bd8d19
...
@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
...
@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
#else
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
#endif
__global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
...
@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
unsigned int numTiles,
unsigned int numTiles,
#endif
#endif
__global const ushort2* exclusionTiles) {
__global const ushort2* exclusionTiles) {
real
energy = 0
.0f
;
mixed
energy = 0;
__local AtomData2 localData[TILE_SIZE];
__local AtomData2 localData[TILE_SIZE];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
59bd8d19
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
...
@@ -429,6 +429,6 @@ __kernel void computeNonbonded(
...
@@ -429,6 +429,6 @@ __kernel void computeNonbonded(
pos++
;
pos++
;
}
}
#
ifdef
INCLUDE_ENERGY
#
ifdef
INCLUDE_ENERGY
energyBuffer[get_global_id
(
0
)
]
+=
(
real
)
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
#
endif
#
endif
}
}
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
59bd8d19
...
@@ -19,7 +19,7 @@ __kernel void computeNonbonded(
...
@@ -19,7 +19,7 @@ __kernel void computeNonbonded(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
...
@@ -27,7 +27,7 @@ __kernel void computeNonbonded(
...
@@ -27,7 +27,7 @@ __kernel void computeNonbonded(
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[TILE_SIZE]
;
__local
AtomData
localData[TILE_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/pme.cl
View file @
59bd8d19
...
@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
...
@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
}
}
}
}
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global
real
* restrict energyBuffer,
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global
mixed
* restrict energyBuffer,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real
energy = 0;
mixed
energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGk.cu
View file @
59bd8d19
...
@@ -23,8 +23,8 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum,
...
@@ -23,8 +23,8 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum,
/**
/**
* Apply the surface area term to the force and energy.
* Apply the surface area term to the force and energy.
*/
*/
extern
"C"
__global__
void
computeSurfaceAreaForce
(
long
long
*
__restrict__
bornForce
,
real
*
__restrict__
energyBuffer
,
const
float2
*
__restrict__
params
,
const
real
*
__restrict__
bornRadii
)
{
extern
"C"
__global__
void
computeSurfaceAreaForce
(
long
long
*
__restrict__
bornForce
,
mixed
*
__restrict__
energyBuffer
,
const
float2
*
__restrict__
params
,
const
real
*
__restrict__
bornRadii
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
bornRadius
=
bornRadii
[
index
];
real
bornRadius
=
bornRadii
[
index
];
float
radius
=
params
[
index
].
x
;
float
radius
=
params
[
index
].
x
;
...
@@ -216,7 +216,7 @@ inline __device__ void zeroAtomData(AtomData2& data) {
...
@@ -216,7 +216,7 @@ inline __device__ void zeroAtomData(AtomData2& data) {
* Compute electrostatic interactions.
* Compute electrostatic interactions.
*/
*/
extern
"C"
__global__
void
computeGKForces
(
extern
"C"
__global__
void
computeGKForces
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
real
*
__restrict__
labFrameDipole
,
const
real4
*
__restrict__
posq
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
real
*
__restrict__
bornRadii
,
unsigned
long
long
*
__restrict__
bornForce
)
{
const
real
*
__restrict__
bornRadii
,
unsigned
long
long
*
__restrict__
bornForce
)
{
...
@@ -225,7 +225,7 @@ extern "C" __global__ void computeGKForces(
...
@@ -225,7 +225,7 @@ extern "C" __global__ void computeGKForces(
const
unsigned
int
numTiles
=
numTileIndices
;
const
unsigned
int
numTiles
=
numTileIndices
;
unsigned
int
pos
=
(
unsigned
int
)
(
startTileIndex
+
warp
*
(
long
long
)
numTiles
/
totalWarps
);
unsigned
int
pos
=
(
unsigned
int
)
(
startTileIndex
+
warp
*
(
long
long
)
numTiles
/
totalWarps
);
unsigned
int
end
=
(
unsigned
int
)
(
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
unsigned
int
end
=
(
unsigned
int
)
(
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData2
localData
[
GK_FORCE_THREAD_BLOCK_SIZE
];
__shared__
AtomData2
localData
[
GK_FORCE_THREAD_BLOCK_SIZE
];
do
{
do
{
...
@@ -605,7 +605,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
...
@@ -605,7 +605,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
* Compute electrostatic interactions.
* Compute electrostatic interactions.
*/
*/
extern
"C"
__global__
void
computeEDiffForce
(
extern
"C"
__global__
void
computeEDiffForce
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
...
@@ -615,7 +615,7 @@ extern "C" __global__ void computeEDiffForce(
...
@@ -615,7 +615,7 @@ extern "C" __global__ void computeEDiffForce(
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData4
localData
[
EDIFF_THREAD_BLOCK_SIZE
];
__shared__
AtomData4
localData
[
EDIFF_THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaWcaForce.cu
View file @
59bd8d19
...
@@ -191,14 +191,14 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real rmi
...
@@ -191,14 +191,14 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real rmi
/**
/**
* Compute WCA interaction.
* Compute WCA interaction.
*/
*/
extern
"C"
__global__
void
computeWCAForce
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
extern
"C"
__global__
void
computeWCAForce
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
float2
*
__restrict__
radiusEpsilon
)
{
const
real4
*
__restrict__
posq
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
float2
*
__restrict__
radiusEpsilon
)
{
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
numTiles
=
numTileIndices
;
const
unsigned
int
numTiles
=
numTileIndices
;
unsigned
int
pos
=
(
unsigned
int
)
(
startTileIndex
+
warp
*
(
long
long
)
numTiles
/
totalWarps
);
unsigned
int
pos
=
(
unsigned
int
)
(
startTileIndex
+
warp
*
(
long
long
)
numTiles
/
totalWarps
);
unsigned
int
end
=
(
unsigned
int
)
(
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
unsigned
int
end
=
(
unsigned
int
)
(
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
do
{
do
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
View file @
59bd8d19
...
@@ -54,7 +54,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
...
@@ -54,7 +54,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
return
(
x
&&
y
?
0.0
f
:
(
x
&&
p
?
0.5
f
:
1.0
f
));
return
(
x
&&
y
?
0.0
f
:
(
x
&&
p
?
0.5
f
:
1.0
f
));
}
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
real
&
energy
)
{
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
mixed
&
energy
)
{
// Compute the displacement.
// Compute the displacement.
real3
delta
;
real3
delta
;
...
@@ -374,7 +374,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
...
@@ -374,7 +374,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
* Compute electrostatic interactions.
* Compute electrostatic interactions.
*/
*/
extern
"C"
__global__
void
computeElectrostatics
(
extern
"C"
__global__
void
computeElectrostatics
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -388,7 +388,7 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -388,7 +388,7 @@ extern "C" __global__ void computeElectrostatics(
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
View file @
59bd8d19
...
@@ -874,14 +874,14 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
...
@@ -874,14 +874,14 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
}
}
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
long
long
*
__restrict__
torqueBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
phi
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
*
__restrict__
phi
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
real
multipole
[
10
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
real
fracToCart
[
3
][
3
];
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
...
@@ -956,7 +956,7 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
...
@@ -956,7 +956,7 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
}
}
extern
"C"
__global__
void
computeInducedDipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
extern
"C"
__global__
void
computeInducedDipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
long
long
*
__restrict__
torqueBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
phi
,
const
real
*
__restrict__
phid
,
const
real
*
__restrict__
phip
,
const
real
*
__restrict__
phi
,
const
real
*
__restrict__
phid
,
const
real
*
__restrict__
phip
,
...
@@ -967,7 +967,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
...
@@ -967,7 +967,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
real
fracToCart
[
3
][
3
];
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
View file @
59bd8d19
...
@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
...
@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
}
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
real
&
energy
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
mixed
&
energy
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
// Compute the displacement.
// Compute the displacement.
real3
delta
;
real3
delta
;
...
@@ -411,7 +411,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
...
@@ -411,7 +411,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
/**
/**
* Compute the self energy and self torque.
* Compute the self energy and self torque.
*/
*/
__device__
void
computeSelfEnergyAndTorque
(
AtomData
&
atom1
,
real
&
energy
)
{
__device__
void
computeSelfEnergyAndTorque
(
AtomData
&
atom1
,
mixed
&
energy
)
{
real
cii
=
atom1
.
q
*
atom1
.
q
;
real
cii
=
atom1
.
q
*
atom1
.
q
;
real3
dipole
=
make_real3
(
atom1
.
sphericalDipole
.
y
,
atom1
.
sphericalDipole
.
z
,
atom1
.
sphericalDipole
.
x
);
real3
dipole
=
make_real3
(
atom1
.
sphericalDipole
.
y
,
atom1
.
sphericalDipole
.
z
,
atom1
.
sphericalDipole
.
x
);
real
dii
=
dot
(
dipole
,
dipole
+
atom1
.
inducedDipole
);
real
dii
=
dot
(
dipole
,
dipole
+
atom1
.
inducedDipole
);
...
@@ -439,7 +439,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
...
@@ -439,7 +439,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
* Compute electrostatic interactions.
* Compute electrostatic interactions.
*/
*/
extern
"C"
__global__
void
computeElectrostatics
(
extern
"C"
__global__
void
computeElectrostatics
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -453,7 +453,7 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -453,7 +453,7 @@ extern "C" __global__ void computeElectrostatics(
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment