Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
69e75377
Commit
69e75377
authored
Oct 07, 2011
by
Peter Eastman
Browse files
Added "const" and "restrict" to lots of kernel arguments to let the compiler do more optimizations
parent
bf8b9f30
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
88 additions
and
84 deletions
+88
-84
platforms/opencl/src/kernels/gbsaObcReductions.cl
platforms/opencl/src/kernels/gbsaObcReductions.cl
+5
-5
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+7
-7
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+7
-7
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+10
-10
platforms/opencl/src/kernels/langevin.cl
platforms/opencl/src/kernels/langevin.cl
+5
-5
platforms/opencl/src/kernels/monteCarloBarostat.cl
platforms/opencl/src/kernels/monteCarloBarostat.cl
+2
-2
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+3
-3
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+3
-3
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+5
-5
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+12
-8
platforms/opencl/src/kernels/pme_cpu.cl
platforms/opencl/src/kernels/pme_cpu.cl
+6
-6
platforms/opencl/src/kernels/random.cl
platforms/opencl/src/kernels/random.cl
+1
-1
platforms/opencl/src/kernels/removeCM.cl
platforms/opencl/src/kernels/removeCM.cl
+2
-2
platforms/opencl/src/kernels/settle.cl
platforms/opencl/src/kernels/settle.cl
+1
-1
platforms/opencl/src/kernels/shakeHydrogens.cl
platforms/opencl/src/kernels/shakeHydrogens.cl
+1
-1
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+6
-6
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+9
-9
platforms/opencl/src/kernels/verlet.cl
platforms/opencl/src/kernels/verlet.cl
+3
-3
No files found.
platforms/opencl/src/kernels/gbsaObcReductions.cl
View file @
69e75377
...
...
@@ -8,11 +8,11 @@
__kernel
void
reduceBornSum
(
int
bufferSize,
int
numBuffers,
float
alpha,
float
beta,
float
gamma,
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
bornSum,
__global
const
long*
restrict
bornSum,
#
else
__global
float*
bornSum,
__global
const
float*
restrict
bornSum,
#
endif
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
__global
const
float2*
restrict
params,
__global
float*
restrict
bornRadii,
__global
float*
restrict
obcChain
)
{
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
//
Get
summed
Born
data
...
...
@@ -49,9 +49,9 @@ __kernel void reduceBornSum(int bufferSize, int numBuffers, float alpha, float b
__kernel
void
reduceBornForce
(
int
bufferSize,
int
numBuffers,
__global
float*
bornForce,
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
bornForceIn,
__global
const
long*
restrict
bornForceIn,
#
endif
__global
float*
energyBuffer,
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
__global
float*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
float*
restrict
bornRadii,
__global
const
float*
restrict
obcChain
)
{
float
energy
=
0.0f
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
69e75377
...
...
@@ -14,10 +14,10 @@ typedef struct {
*
Compute
the
Born
sum.
*/
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__kernel
void
computeBornSum
(
__global
float*
restrict
global_bornSum,
__global
const
float4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__local
AtomData*
restrict
localData,
__local
float*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
)
{
#
else
unsigned
int
numTiles
)
{
#
endif
...
...
@@ -190,11 +190,11 @@ __kernel void computeBornSum(__global float* global_bornSum, __global float4* po
* First part of computing the GBSA interaction.
*/
__kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuffer,
__global float4* posq, __global
float*
global_bornRadii, __global float* global_bornForce,
__local AtomData* localData, __local float4* tempBuffer,
__kernel void computeGBSAForce1(__global float4*
restrict
forceBuffers, __global float*
restrict
energyBuffer,
__global
const
float4*
restrict
posq, __global
const float* restrict
global_bornRadii, __global float*
restrict
global_bornForce,
__local AtomData*
restrict
localData, __local float4*
restrict
tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags) {
__global
const
ushort2*
restrict
tiles, __global
const
unsigned int*
restrict
interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global
const
unsigned int*
restrict
interactionFlags) {
#else
unsigned int numTiles) {
#endif
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
69e75377
...
...
@@ -12,10 +12,10 @@ typedef struct {
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData1*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
restrict
global_bornSum,
__global
const
float4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__local
AtomData1*
restrict
localData,
__local
float*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
)
{
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
)
{
#
else
unsigned
int
numTiles
)
{
#
endif
...
...
@@ -205,11 +205,11 @@ typedef struct {
*/
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeGBSAForce1(__global float4* forceBuffers, __global float* global_bornForce,
__global float* energyBuffer, __global float4* posq, __global float* global_bornRadii,
__local AtomData2* localData, __local float4* tempBuffer,
void computeGBSAForce1(__global float4*
restrict
forceBuffers, __global float*
restrict
global_bornForce,
__global float*
restrict
energyBuffer, __global
const
float4*
restrict
posq, __global
const
float*
restrict
global_bornRadii,
__local AtomData2*
restrict
localData, __local float4*
restrict
tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles) {
__global
const
ushort2*
restrict
tiles, __global
const
unsigned int*
restrict
interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles) {
#else
unsigned int numTiles) {
#endif
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
69e75377
...
...
@@ -16,14 +16,14 @@ typedef struct {
*/
__kernel
void
computeBornSum
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
global_bornSum,
__global
long*
restrict
global_bornSum,
#
else
__global
float*
global_bornSum,
__global
float*
restrict
global_bornSum,
#
endif
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData1*
localData,
__local
float*
tempBuffer,
__global
const
float4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__local
AtomData1*
restrict
localData,
__local
float*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags,
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags,
#
else
unsigned
int
numTiles,
#
endif
...
...
@@ -337,14 +337,14 @@ typedef struct {
__kernel void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS
__global long* forceBuffers, __global long* global_bornForce,
__global long*
restrict
forceBuffers, __global long*
restrict
global_bornForce,
#else
__global float4* forceBuffers, __global float* global_bornForce,
__global float4*
restrict
forceBuffers, __global float*
restrict
global_bornForce,
#endif
__global float* energyBuffer, __global float4* posq, __global float* global_bornRadii,
__local AtomData2* localData, __local float4* tempBuffer,
__global float*
restrict
energyBuffer, __global
const
float4*
restrict
posq, __global
const
float*
restrict
global_bornRadii,
__local AtomData2*
restrict
localData, __local float4*
restrict
tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags,
__global
const
ushort2*
restrict
tiles, __global
const
unsigned int*
restrict
interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global
const
unsigned int*
restrict
interactionFlags,
#else
unsigned int numTiles,
#endif
...
...
platforms/opencl/src/kernels/langevin.cl
View file @
69e75377
...
...
@@ -8,8 +8,8 @@ enum {VelScale, ForceScale, NoiseScale, MaxParams};
*
Perform
the
first
step
of
Langevin
integration.
*/
__kernel
void
integrateLangevinPart1
(
__global
float4*
velm,
__global
float4*
force,
__global
float4*
posDelta,
__global
float*
paramBuffer,
__global
float2*
dt,
__global
float4*
random,
unsigned
int
randomIndex
)
{
__kernel
void
integrateLangevinPart1
(
__global
float4*
restrict
velm,
__global
const
float4*
restrict
force,
__global
float4*
restrict
posDelta,
__global
const
float*
restrict
paramBuffer,
__global
const
float2*
restrict
dt,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
float
vscale
=
paramBuffer[VelScale]
;
float
fscale
=
paramBuffer[ForceScale]
;
float
noisescale
=
paramBuffer[NoiseScale]
;
...
...
@@ -31,7 +31,7 @@ __kernel void integrateLangevinPart1(__global float4* velm, __global float4* for
*
Perform
the
second
step
of
Langevin
integration.
*/
__kernel
void
integrateLangevinPart2
(
__global
float4*
posq,
__global
float4*
posDelta,
__global
float4*
velm,
__global
float2*
dt
)
{
__kernel
void
integrateLangevinPart2
(
__global
float4*
restrict
posq,
__global
const
float4*
restrict
posDelta,
__global
float4*
restrict
velm,
__global
const
float2*
restrict
dt
)
{
#
ifdef
cl_khr_fp64
double
invStepSize
=
1.0/dt[0].y
;
#
else
...
...
@@ -58,8 +58,8 @@ __kernel void integrateLangevinPart2(__global float4* posq, __global float4* pos
*
Select
the
step
size
to
use
for
the
next
step.
*/
__kernel
void
selectLangevinStepSize
(
float
maxStepSize,
float
errorTol,
float
tau,
float
kT,
__global
float2*
dt,
__global
float4*
velm,
__global
float4*
force,
__global
float*
paramBuffer,
__local
float*
params,
__local
float*
error
)
{
__kernel
void
selectLangevinStepSize
(
float
maxStepSize,
float
errorTol,
float
tau,
float
kT,
__global
float2*
restrict
dt,
__global
const
float4*
restrict
velm,
__global
const
float4*
restrict
force,
__global
float*
restrict
paramBuffer,
__local
float*
restrict
params,
__local
float*
restrict
error
)
{
//
Calculate
the
error.
float
err
=
0.0f
;
...
...
platforms/opencl/src/kernels/monteCarloBarostat.cl
View file @
69e75377
...
...
@@ -2,8 +2,8 @@
*
Scale
the
particle
positions.
*/
__kernel
void
scalePositions
(
float
scale,
int
numMolecules,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
int*
moleculeAtoms,
__global
int*
moleculeStartIndex
)
{
__kernel
void
scalePositions
(
float
scale,
int
numMolecules,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
restrict
posq,
__global
const
int*
restrict
moleculeAtoms,
__global
const
int*
restrict
moleculeStartIndex
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < numMolecules; index += get_global_size(0)) {
int
first
=
moleculeStartIndex[index]
;
int
last
=
moleculeStartIndex[index+1]
;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
69e75377
...
...
@@ -11,11 +11,11 @@ typedef struct {
*
Compute
nonbonded
interactions.
*/
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__kernel
void
computeNonbonded
(
__global
float4*
restrict
forceBuffers,
__global
float*
restrict
energyBuffer,
__global
const
float4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__local
AtomData*
restrict
localData,
__local
float4*
restrict
tempBuffer,
unsigned
int
startTileIndex,
unsigned
int
endTileIndex,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
69e75377
...
...
@@ -12,11 +12,11 @@ typedef struct {
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
void
computeNonbonded
(
__global
float4*
restrict
forceBuffers,
__global
float*
restrict
energyBuffer,
__global
const
float4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__local
AtomData*
restrict
localData,
__local
float4*
restrict
tempBuffer,
unsigned
int
startTileIndex,
unsigned
int
endTileIndex,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
69e75377
...
...
@@ -16,15 +16,15 @@ typedef struct {
*/
__kernel
void
computeNonbonded
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
forceBuffers,
__global
long*
restrict
forceBuffers,
#
else
__global
float4*
forceBuffers,
__global
float4*
restrict
forceBuffers,
#
endif
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__global
float*
restrict
energyBuffer,
__global
const
float4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__local
AtomData*
restrict
localData,
__local
float*
restrict
tempBuffer,
unsigned
int
startTileIndex,
unsigned
int
endTileIndex,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/pme.cl
View file @
69e75377
__kernel
void
updateBsplines
(
__global
float4*
posq,
__global
float4*
pmeBsplineTheta,
__local
float4*
bsplinesCache,
__global
int2*
pmeAtomGridIndex,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__kernel
void
updateBsplines
(
__global
const
float4*
restrict
posq,
__global
float4*
restrict
pmeBsplineTheta,
__local
float4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
const
float4
scale
=
1.0f/
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
float4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
...
...
@@ -38,7 +39,7 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
/**
*
For
each
grid
point,
find
the
range
of
sorted
atoms
associated
with
that
point.
*/
__kernel
void
findAtomRangeForGrid
(
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float4*
posq,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__kernel
void
findAtomRangeForGrid
(
__global
int2*
restrict
pmeAtomGridIndex,
__global
int*
restrict
pmeAtomRange,
__global
const
float4*
restrict
posq,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
int
last
=
(
start
==
0
?
-1
:
pmeAtomGridIndex[start-1].y
)
;
...
...
@@ -75,7 +76,8 @@ __kernel void findAtomRangeForGrid(__global int2* pmeAtomGridIndex, __global int
#
define
BUFFER_SIZE
(
PME_ORDER*PME_ORDER*PME_ORDER
)
__kernel
__attribute__
((
reqd_work_group_size
(
BUFFER_SIZE,
1
,
1
)))
__kernel
void
gridSpreadCharge
(
__global
float4*
posq,
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
long*
pmeGrid,
__global
float4*
pmeBsplineTheta,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
void
gridSpreadCharge
(
__global
const
float4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
float4*
restrict
pmeBsplineTheta,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
int
ix
=
get_local_id
(
0
)
/
(
PME_ORDER*PME_ORDER
)
;
int
remainder
=
get_local_id
(
0
)
-ix*PME_ORDER*PME_ORDER
;
int
iy
=
remainder/PME_ORDER
;
...
...
@@ -122,7 +124,7 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
}
}
__kernel
void
finishSpreadCharge
(
__global
long*
pmeGrid
)
{
__kernel
void
finishSpreadCharge
(
__global
long*
restrict
pmeGrid
)
{
__global
float2*
floatGrid
=
(
__global
float2*
)
pmeGrid
;
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
float
scale
=
EPSILON_FACTOR/
(
float
)
0xFFFFFFFF
;
...
...
@@ -133,7 +135,8 @@ __kernel void finishSpreadCharge(__global long* pmeGrid) {
}
}
#
else
__kernel
void
gridSpreadCharge
(
__global
float4*
posq,
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float2*
pmeGrid,
__global
float4*
pmeBsplineTheta
)
{
__kernel
void
gridSpreadCharge
(
__global
const
float4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
float2*
restrict
pmeGrid,
__global
const
float4*
restrict
pmeBsplineTheta
)
{
unsigned
int
numGridPoints
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
for
(
int
gridIndex
=
get_global_id
(
0
)
; gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
//
Compute
the
charge
on
a
grid
point.
...
...
@@ -190,8 +193,8 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
}
#
endif
__kernel
void
reciprocalConvolution
(
__global
float2*
pmeGrid,
__global
float*
energyBuffer,
__global
float*
pmeBsplineModuliX,
__global
float*
pmeBsplineModuliY,
__global
float*
pmeBsplineModuliZ,
float4
invPeriodicBoxSize,
float
recipScaleFactor
)
{
__kernel
void
reciprocalConvolution
(
__global
float2*
restrict
pmeGrid,
__global
float*
restrict
energyBuffer,
__global
const
float*
restrict
pmeBsplineModuliX,
__global
const
float*
restrict
pmeBsplineModuliY,
__global
const
float*
restrict
pmeBsplineModuliZ,
float4
invPeriodicBoxSize,
float
recipScaleFactor
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
float
energy
=
0.0f
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
...
...
@@ -220,7 +223,8 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
}
__kernel
void
gridInterpolateForce
(
__global
float4*
posq,
__global
float4*
forceBuffers,
__global
float2*
pmeGrid,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__local
float4*
bsplinesCache
)
{
__kernel
void
gridInterpolateForce
(
__global
const
float4*
restrict
posq,
__global
float4*
restrict
forceBuffers,
__global
const
float2*
restrict
pmeGrid,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__local
float4*
restrict
bsplinesCache
)
{
const
float4
scale
=
1.0f/
(
PME_ORDER-1
)
;
__local
float4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
float4*
ddata
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER
+
get_local_size
(
0
)
*PME_ORDER]
;
...
...
platforms/opencl/src/kernels/pme_cpu.cl
View file @
69e75377
__kernel
void
updateBsplines
(
__global
float4*
posq,
__global
float4*
pmeBsplineTheta,
__global
float4*
pmeBsplineDTheta,
__local
float4*
bsplinesCache,
__global
int2*
pmeAtomGridIndex,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__kernel
void
updateBsplines
(
__global
const
float4*
restrict
posq,
__global
float4*
restrict
pmeBsplineTheta,
__global
float4*
restrict
pmeBsplineDTheta,
__local
float4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
const
float4
scale
=
1.0f/
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
float4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
...
...
@@ -42,10 +42,10 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
/**
*
This
kernel
is
not
actually
used
when
running
on
a
CPU.
*/
__kernel
void
findAtomRangeForGrid
(
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float4*
posq,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__kernel
void
findAtomRangeForGrid
(
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
int*
restrict
pmeAtomRange,
__global
const
float4*
restrict
posq,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
}
__kernel
void
gridSpreadCharge
(
__global
float4*
posq,
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float2*
pmeGrid,
__global
float4*
pmeBsplineTheta,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__kernel
void
gridSpreadCharge
(
__global
const
float4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
float2*
restrict
pmeGrid,
__global
const
float4*
restrict
pmeBsplineTheta,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
for
(
int
gridIndex
=
firstx*GRID_SIZE_Y*GRID_SIZE_Z
; gridIndex < lastx*GRID_SIZE_Y*GRID_SIZE_Z; gridIndex++)
...
...
@@ -82,8 +82,8 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
}
}
__kernel
void
reciprocalConvolution
(
__global
float2*
pmeGrid,
__global
float*
energyBuffer,
__global
float*
pmeBsplineModuliX,
__global
float*
pmeBsplineModuliY,
__global
float*
pmeBsplineModuliZ,
float4
invPeriodicBoxSize,
float
recipScaleFactor
)
{
__kernel
void
reciprocalConvolution
(
__global
float2*
restrict
pmeGrid,
__global
float*
restrict
energyBuffer,
__global
const
float*
restrict
pmeBsplineModuliX,
__global
const
float*
restrict
pmeBsplineModuliY,
__global
const
float*
restrict
pmeBsplineModuliZ,
float4
invPeriodicBoxSize,
float
recipScaleFactor
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
float
energy
=
0.0f
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
...
...
@@ -112,7 +112,7 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
}
__kernel
void
gridInterpolateForce
(
__global
float4*
posq,
__global
float4*
forceBuffers,
__global
float4*
pmeBsplineTheta,
__global
float4*
pmeBsplineDTheta,
__global
float2*
pmeGrid,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__kernel
void
gridInterpolateForce
(
__global
const
float4*
restrict
posq,
__global
float4*
restrict
forceBuffers,
__global
const
float4*
restrict
pmeBsplineTheta,
__global
const
float4*
restrict
pmeBsplineDTheta,
__global
const
float2*
restrict
pmeGrid,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0)) {
float4
force
=
0.0f
;
float4
pos
=
posq[atom]
;
...
...
platforms/opencl/src/kernels/random.cl
View file @
69e75377
...
...
@@ -2,7 +2,7 @@
*
Generate
random
numbers
*/
__kernel
void
generateRandomNumbers
(
int
numValues,
__global
float4*
random,
__global
uint4*
seed
)
{
__kernel
void
generateRandomNumbers
(
int
numValues,
__global
float4*
restrict
random,
__global
uint4*
restrict
seed
)
{
int
index
=
get_global_id
(
0
)
;
uint4
state
=
seed[index]
;
unsigned
int
carry
=
0
;
...
...
platforms/opencl/src/kernels/removeCM.cl
View file @
69e75377
...
...
@@ -2,7 +2,7 @@
*
Calculate
the
center
of
mass
momentum.
*/
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
velm,
__global
float4*
cmMomentum,
__local
float4*
temp
)
{
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
float4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
float4*
restrict
temp
)
{
int
index
=
get_global_id
(
0
)
;
float4
cm
=
0.0f
;
while
(
index
<
numAtoms
)
{
...
...
@@ -53,7 +53,7 @@ __kernel void calcCenterOfMassMomentum(int numAtoms, __global float4* velm, __gl
*
Remove
center
of
mass
motion.
*/
__kernel
void
removeCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
velm,
__global
float4*
cmMomentum,
__local
float4*
temp
)
{
__kernel
void
removeCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
float4*
restrict
temp
)
{
//
First
sum
all
of
the
momenta
that
were
calculated
by
individual
groups.
int
index
=
get_local_id
(
0
)
;
...
...
platforms/opencl/src/kernels/settle.cl
View file @
69e75377
...
...
@@ -2,7 +2,7 @@
*
Enforce
constraints
on
SETTLE
clusters
*/
__kernel
void
applySettle
(
int
numClusters,
float
tol,
__global
float4*
oldPos,
__global
float4*
posDelta,
__global
float4*
newDelta,
__global
float4*
velm,
__global
int4*
clusterAtoms,
__global
float2*
clusterParams
)
{
__kernel
void
applySettle
(
int
numClusters,
float
tol,
__global
const
float4*
restrict
oldPos,
__global
const
float4*
restrict
posDelta,
__global
float4*
restrict
newDelta,
__global
const
float4*
restrict
velm,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float2*
restrict
clusterParams
)
{
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numClusters
)
{
//
Load
the
data
for
this
cluster.
...
...
platforms/opencl/src/kernels/shakeHydrogens.cl
View file @
69e75377
...
...
@@ -2,7 +2,7 @@
*
Enforce
constraints
on
SHAKE
clusters
*/
__kernel
void
applyShakeToHydrogens
(
int
numClusters,
float
tol,
__global
float4*
oldPos,
__global
float4*
posDelta,
__global
float4*
newDelta,
__global
int4*
clusterAtoms,
__global
float4*
clusterParams
)
{
__kernel
void
applyShakeToHydrogens
(
int
numClusters,
float
tol,
__global
const
float4*
restrict
oldPos,
__global
const
float4*
restrict
posDelta,
__global
float4*
restrict
newDelta,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float4*
restrict
clusterParams
)
{
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numClusters
)
{
//
Load
the
data
for
this
cluster.
...
...
platforms/opencl/src/kernels/sort.cl
View file @
69e75377
...
...
@@ -8,7 +8,7 @@ float getValue(TYPE value) {
*
Calculate
the
minimum
and
maximum
value
in
the
array
to
be
sorted.
This
kernel
*
is
executed
as
a
single
work
group.
*/
__kernel
void
computeRange
(
__global
TYPE*
data,
int
length,
__global
float2*
range,
__local
float*
buffer
)
{
__kernel
void
computeRange
(
__global
const
TYPE*
restrict
data,
int
length,
__global
float2*
restrict
range,
__local
float*
restrict
buffer
)
{
float
minimum
=
MAXFLOAT
;
float
maximum
=
-MAXFLOAT
;
...
...
@@ -45,8 +45,8 @@ __kernel void computeRange(__global TYPE* data, int length, __global float2* ran
/**
*
Assign
elements
to
buckets.
*/
__kernel
void
assignElementsToBuckets
(
__global
TYPE*
data,
int
length,
int
numBuckets,
__global
float2*
range,
__global
int*
bucketOffset,
__global
int*
bucketOfElement,
__global
int*
offsetInBucket
)
{
__kernel
void
assignElementsToBuckets
(
__global
const
TYPE*
restrict
data,
int
length,
int
numBuckets,
__global
const
float2*
restrict
range,
__global
int*
bucketOffset,
__global
int*
restrict
bucketOfElement,
__global
int*
restrict
offsetInBucket
)
{
#
ifdef
AMD_ATOMIC_WORK_AROUND
//
Do
a
byte
write
to
force
all
memory
accesses
to
interactionCount
to
use
the
complete
path.
//
This
avoids
the
atomic
access
from
causing
all
word
accesses
to
other
buffers
from
using
the
slow
complete
path.
...
...
@@ -72,7 +72,7 @@ __kernel void assignElementsToBuckets(__global TYPE* data, int length, int numBu
*
Sum
the
bucket
sizes
to
compute
the
start
position
of
each
bucket.
This
kernel
*
is
executed
as
a
single
work
group.
*/
__kernel
void
computeBucketPositions
(
int
numBuckets,
__global
int*
bucketOffset,
__local
int*
buffer
)
{
__kernel
void
computeBucketPositions
(
int
numBuckets,
__global
int*
restrict
bucketOffset,
__local
int*
restrict
buffer
)
{
int
globalOffset
=
0
;
for
(
int
startBucket
=
0
; startBucket < numBuckets; startBucket += get_local_size(0)) {
//
Load
the
bucket
sizes
into
local
memory.
...
...
@@ -101,7 +101,7 @@ __kernel void computeBucketPositions(int numBuckets, __global int* bucketOffset,
/**
*
Copy
the
input
data
into
the
buckets
for
sorting.
*/
__kernel
void
copyDataToBuckets
(
__global
TYPE*
data,
__global
TYPE*
buckets,
int
length,
__global
int*
bucketOffset,
__global
int*
bucketOfElement,
__global
int*
offsetInBucket
)
{
__kernel
void
copyDataToBuckets
(
__global
const
TYPE*
restrict
data,
__global
TYPE*
restrict
buckets,
int
length,
__global
const
int*
restrict
bucketOffset,
__global
const
int*
restrict
bucketOfElement,
__global
const
int*
restrict
offsetInBucket
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < length; index += get_global_size(0)) {
TYPE
element
=
data[index]
;
int
bucketIndex
=
bucketOfElement[index]
;
...
...
@@ -113,7 +113,7 @@ __kernel void copyDataToBuckets(__global TYPE* data, __global TYPE* buckets, int
/**
*
Sort
the
data
in
each
bucket.
*/
__kernel
void
sortBuckets
(
__global
TYPE*
data,
__global
TYPE*
buckets,
int
numBuckets,
__global
int*
bucketOffset,
__local
TYPE*
buffer
)
{
__kernel
void
sortBuckets
(
__global
TYPE*
restrict
data,
__global
const
TYPE*
restrict
buckets,
int
numBuckets,
__global
const
int*
restrict
bucketOffset,
__local
TYPE*
restrict
buffer
)
{
for
(
int
index
=
get_group_id
(
0
)
; index < numBuckets; index += get_num_groups(0)) {
int
startIndex
=
(
index
==
0
?
0
:
bucketOffset[index-1]
)
;
int
endIndex
=
bucketOffset[index]
;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
69e75377
...
...
@@ -2,7 +2,7 @@
*
Fill
a
buffer
with
0.
*/
__kernel
void
clearBuffer
(
__global
int*
buffer,
int
size
)
{
__kernel
void
clearBuffer
(
__global
int*
restrict
buffer,
int
size
)
{
int
index
=
get_global_id
(
0
)
;
__global
int4*
buffer4
=
(
__global
int4*
)
buffer
;
int
sizeDiv4
=
size/4
;
...
...
@@ -18,7 +18,7 @@ __kernel void clearBuffer(__global int* buffer, int size) {
/**
*
Fill
two
buffers
with
0.
*/
__kernel
void
clearTwoBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2
)
{
__kernel
void
clearTwoBuffers
(
__global
int*
restrict
buffer1,
int
size1,
__global
int*
restrict
buffer2,
int
size2
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
}
...
...
@@ -26,7 +26,7 @@ __kernel void clearTwoBuffers(__global int* buffer1, int size1, __global int* bu
/**
*
Fill
three
buffers
with
0.
*/
__kernel
void
clearThreeBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2,
__global
int*
buffer3,
int
size3
)
{
__kernel
void
clearThreeBuffers
(
__global
int*
restrict
buffer1,
int
size1,
__global
int*
restrict
buffer2,
int
size2,
__global
int*
restrict
buffer3,
int
size3
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
...
...
@@ -35,7 +35,7 @@ __kernel void clearThreeBuffers(__global int* buffer1, int size1, __global int*
/**
*
Fill
four
buffers
with
0.
*/
__kernel
void
clearFourBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2,
__global
int*
buffer3,
int
size3,
__global
int*
buffer4,
int
size4
)
{
__kernel
void
clearFourBuffers
(
__global
int*
restrict
buffer1,
int
size1,
__global
int*
restrict
buffer2,
int
size2,
__global
int*
restrict
buffer3,
int
size3,
__global
int*
restrict
buffer4,
int
size4
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
...
...
@@ -45,7 +45,7 @@ __kernel void clearFourBuffers(__global int* buffer1, int size1, __global int* b
/**
*
Fill
five
buffers
with
0.
*/
__kernel
void
clearFiveBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2,
__global
int*
buffer3,
int
size3,
__global
int*
buffer4,
int
size4,
__global
int*
buffer5,
int
size5
)
{
__kernel
void
clearFiveBuffers
(
__global
int*
restrict
buffer1,
int
size1,
__global
int*
restrict
buffer2,
int
size2,
__global
int*
restrict
buffer3,
int
size3,
__global
int*
restrict
buffer4,
int
size4,
__global
int*
restrict
buffer5,
int
size5
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
...
...
@@ -56,7 +56,7 @@ __kernel void clearFiveBuffers(__global int* buffer1, int size1, __global int* b
/**
*
Fill
six
buffers
with
0.
*/
__kernel
void
clearSixBuffers
(
__global
int*
buffer1,
int
size1,
__global
int*
buffer2,
int
size2,
__global
int*
buffer3,
int
size3,
__global
int*
buffer4,
int
size4,
__global
int*
buffer5,
int
size5,
__global
int*
buffer6,
int
size6
)
{
__kernel
void
clearSixBuffers
(
__global
int*
restrict
buffer1,
int
size1,
__global
int*
restrict
buffer2,
int
size2,
__global
int*
restrict
buffer3,
int
size3,
__global
int*
restrict
buffer4,
int
size4,
__global
int*
restrict
buffer5,
int
size5,
__global
int*
restrict
buffer6,
int
size6
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
...
...
@@ -69,7 +69,7 @@ __kernel void clearSixBuffers(__global int* buffer1, int size1, __global int* bu
*
Sum
a
collection
of
buffers
into
the
first
one.
*/
__kernel
void
reduceFloat4Buffer
(
__global
float4*
buffer,
int
bufferSize,
int
numBuffers
)
{
__kernel
void
reduceFloat4Buffer
(
__global
float4*
restrict
buffer,
int
bufferSize,
int
numBuffers
)
{
int
index
=
get_global_id
(
0
)
;
int
totalSize
=
bufferSize*numBuffers
;
while
(
index
<
bufferSize
)
{
...
...
@@ -84,7 +84,7 @@ __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int nu
/**
*
Sum
the
various
buffers
containing
forces.
*/
__kernel
void
reduceForces
(
__global
long*
longBuffer,
__global
float4*
buffer,
int
bufferSize,
int
numBuffers
)
{
__kernel
void
reduceForces
(
__global
const
long*
restrict
longBuffer,
__global
float4*
restrict
buffer,
int
bufferSize,
int
numBuffers
)
{
int
totalSize
=
bufferSize*numBuffers
;
float
scale
=
1.0f/
(
float
)
0xFFFFFFFF
;
for
(
int
index
=
get_global_id
(
0
)
; index < bufferSize; index += get_global_size(0)) {
...
...
@@ -99,7 +99,7 @@ __kernel void reduceForces(__global long* longBuffer, __global float4* buffer, i
*
This
is
called
to
determine
the
accuracy
of
various
native
functions.
*/
__kernel
void
determineNativeAccuracy
(
__global
float8*
values,
int
numValues
)
{
__kernel
void
determineNativeAccuracy
(
__global
float8*
restrict
values,
int
numValues
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < numValues; i += get_global_size(0)) {
float
v
=
values[i].s0
;
values[i]
=
(
float8
)
(
v,
native_sqrt
(
v
)
,
native_rsqrt
(
v
)
,
native_recip
(
v
)
,
native_exp
(
v
)
,
native_log
(
v
)
,
0.0f,
0.0f
)
;
...
...
platforms/opencl/src/kernels/verlet.cl
View file @
69e75377
...
...
@@ -6,7 +6,7 @@
*
Perform
the
first
step
of
verlet
integration.
*/
__kernel
void
integrateVerletPart1
(
int
numAtoms,
__global
float2*
dt,
__global
float4*
posq,
__global
float4*
velm,
__global
float4*
force,
__global
float4*
posDelta
)
{
__kernel
void
integrateVerletPart1
(
int
numAtoms,
__global
const
float2*
restrict
dt,
__global
const
float4*
restrict
posq,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
force,
__global
float4*
restrict
posDelta
)
{
float2
stepSize
=
dt[0]
;
float
dtPos
=
stepSize.y
;
float
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
...
...
@@ -26,7 +26,7 @@ __kernel void integrateVerletPart1(int numAtoms, __global float2* dt, __global f
*
Perform
the
second
step
of
verlet
integration.
*/
__kernel
void
integrateVerletPart2
(
int
numAtoms,
__global
float2*
dt,
__global
float4*
posq,
__global
float4*
velm,
__global
float4*
posDelta
)
{
__kernel
void
integrateVerletPart2
(
int
numAtoms,
__global
float2*
restrict
dt,
__global
float4*
restrict
posq,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
posDelta
)
{
float2
stepSize
=
dt[0]
;
#
ifdef
cl_khr_fp64
double
oneOverDt
=
1.0/stepSize.y
;
...
...
@@ -57,7 +57,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* dt, __global f
*
Select
the
step
size
to
use
for
the
next
step.
*/
__kernel
void
selectVerletStepSize
(
int
numAtoms,
float
maxStepSize,
float
errorTol,
__global
float2*
dt,
__global
float4*
velm,
__global
float4*
force,
__local
float*
error
)
{
__kernel
void
selectVerletStepSize
(
int
numAtoms,
float
maxStepSize,
float
errorTol,
__global
float2*
restrict
dt,
__global
const
float4*
restrict
velm,
__global
const
float4*
restrict
force,
__local
float*
restrict
error
)
{
//
Calculate
the
error.
float
err
=
0.0f
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment