Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6c6d68d2
Commit
6c6d68d2
authored
May 26, 2016
by
peastman
Browse files
Merge pull request #1499 from peastman/reallocneighbors
Improved method for resizing neighbor list
parents
c2be00a6
6cc2edd1
Changes
26
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
401 additions
and
383 deletions
+401
-383
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+13
-1
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+2
-0
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+0
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+3
-0
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+20
-15
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+21
-23
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+26
-28
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+47
-51
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+29
-28
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+13
-1
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+3
-0
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+0
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+3
-0
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+21
-18
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+28
-30
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+22
-24
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+28
-30
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+22
-24
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+56
-60
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+44
-48
No files found.
platforms/cuda/include/CudaContext.h
View file @
6c6d68d2
...
...
@@ -326,6 +326,18 @@ public:
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool
getForcesValid
()
const
{
return
forcesValid
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void
setForcesValid
(
bool
valid
)
{
forcesValid
=
valid
;
}
/**
* Get the number of atoms.
*/
...
...
@@ -572,7 +584,7 @@ private:
int
paddedNumAtoms
;
int
numAtomBlocks
;
int
numThreadBlocks
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
,
boxIsTriclinic
,
hasCompilerKernel
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
,
boxIsTriclinic
,
hasCompilerKernel
,
forcesValid
;
std
::
string
compiler
,
tempDir
,
cacheDir
,
gpuArchitecture
;
float4
periodicBoxVecXFloat
,
periodicBoxVecYFloat
,
periodicBoxVecZFloat
,
periodicBoxSizeFloat
,
invPeriodicBoxSizeFloat
;
double4
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
,
periodicBoxSize
,
invPeriodicBoxSize
;
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
6c6d68d2
...
...
@@ -269,6 +269,8 @@ private:
CudaArray
*
oldPositions
;
CudaArray
*
rebuildNeighborList
;
CudaSort
*
blockSorter
;
CUevent
downloadCountEvent
;
int
*
pinnedCountBuffer
;
std
::
vector
<
void
*>
forceArgs
,
findBlockBoundsArgs
,
sortBoxDataArgs
,
findInteractingBlocksArgs
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
6c6d68d2
...
...
@@ -1131,7 +1131,6 @@ void CudaContext::reorderAtoms() {
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
else
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
nonbonded
->
updateNeighborListSize
();
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
6c6d68d2
...
...
@@ -94,6 +94,7 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void
CudaCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cu
.
setForcesValid
(
true
);
cu
.
setAsCurrent
();
cu
.
clearAutoclearBuffers
();
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
...
...
@@ -125,6 +126,8 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
sum
+=
energy
[
i
];
}
}
if
(
!
cu
.
getForcesValid
())
valid
=
false
;
return
sum
;
}
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
6c6d68d2
...
...
@@ -65,12 +65,14 @@ private:
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
context
.
getDevice
()));
CHECK_RESULT
(
cuEventCreate
(
&
downloadCountEvent
,
0
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedCountBuffer
,
sizeof
(
int
),
CU_MEMHOSTALLOC_PORTABLE
));
numForceThreadBlocks
=
4
*
multiprocessors
;
forceThreadBlockSize
=
(
context
.
getComputeCapability
()
<
2.0
?
128
:
256
);
}
...
...
@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedCountBuffer
);
cuEventDestroy
(
downloadCountEvent
);
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
...
@@ -375,17 +380,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
do
{
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
interactionCount
->
download
(
pinnedCountBuffer
,
false
);
cuEventRecord
(
downloadCountEvent
,
context
.
getCurrentStream
());
}
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -398,20 +400,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
if
(
useCutoff
&&
numTiles
>
0
)
{
cuEventSynchronize
(
downloadCountEvent
);
updateNeighborListSize
();
}
}
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
maxTiles
)
if
(
pinnedCountBuffer
[
0
]
<=
(
unsigned
int
)
maxTiles
)
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
maxTiles
=
(
int
)
(
1.2
*
pinned
CountBuffer
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
...
...
@@ -428,6 +432,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
}
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
6c6d68d2
...
...
@@ -168,6 +168,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -191,16 +193,12 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
...
@@ -223,7 +221,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
6c6d68d2
...
...
@@ -146,6 +146,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -167,16 +169,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
...
@@ -199,7 +197,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
6c6d68d2
...
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -225,16 +227,12 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
...
@@ -257,7 +255,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -580,16 +580,12 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
...
@@ -612,7 +608,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
6c6d68d2
...
...
@@ -117,7 +117,9 @@ extern "C" __global__ void computeNonbonded(
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#endif
// First loop: process tiles that contain exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
...
...
@@ -309,8 +311,11 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
...
...
@@ -335,16 +340,12 @@ extern "C" __global__ void computeNonbonded(
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
}
else
#endif
{
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
...
@@ -367,7 +368,7 @@ extern "C" __global__ void computeNonbonded(
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
// Load atom data for this tile.
...
...
platforms/opencl/include/OpenCLContext.h
View file @
6c6d68d2
...
...
@@ -408,6 +408,18 @@ public:
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool
getForcesValid
()
const
{
return
forcesValid
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void
setForcesValid
(
bool
valid
)
{
forcesValid
=
valid
;
}
/**
* Get the number of atoms.
*/
...
...
@@ -684,7 +696,7 @@ private:
int
numThreadBlocks
;
int
numForceBuffers
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
,
forcesValid
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
6c6d68d2
...
...
@@ -281,6 +281,9 @@ private:
OpenCLArray
*
oldPositions
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLSort
*
blockSorter
;
cl
::
Event
downloadCountEvent
;
cl
::
Buffer
*
pinnedCountBuffer
;
int
*
pinnedCountMemory
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
arguments
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
6c6d68d2
...
...
@@ -1052,7 +1052,6 @@ void OpenCLContext::reorderAtoms() {
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
else
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
nonbonded
->
updateNeighborListSize
();
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
6c6d68d2
...
...
@@ -118,6 +118,7 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cl.setForcesValid(true);
cl.clearAutoclearBuffers();
for (vector<OpenCLContext::ForcePreComputation*>::iterator iter = cl.getPreComputations().begin(); iter != cl.getPreComputations().end(); ++iter)
(*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
...
...
@@ -149,6 +150,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
sum += energy[i];
}
}
if (!cl.getForcesValid())
valid = false;
return sum;
}
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
6c6d68d2
...
...
@@ -57,7 +57,7 @@ private:
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
pinnedCountMemory
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks and force buffers to use.
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
...
...
@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers
=
numForceThreadBlocks
*
forceThreadBlockSize
/
OpenCLContext
::
TileSize
;
}
}
pinnedCountBuffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
sizeof
(
int
));
pinnedCountMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
*
pinnedCountBuffer
,
CL_TRUE
,
CL_MAP_READ
,
0
,
sizeof
(
int
));
}
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
...
...
@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
delete
pinnedCountBuffer
;
}
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
...
@@ -357,8 +361,6 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
do
{
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
...
...
@@ -367,10 +369,8 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
context
.
getQueue
().
enqueueReadBuffer
(
interactionCount
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
sizeof
(
int
),
pinnedCountMemory
,
NULL
,
&
downloadCountEvent
);
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -385,20 +385,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
if
(
useCutoff
&&
numTiles
>
0
)
{
downloadCountEvent
.
wait
();
updateNeighborListSize
();
}
}
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
if
(
pinnedCountMemory
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
int
maxTiles
=
(
int
)
(
1.2
*
pinned
CountMemory
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
...
...
@@ -430,6 +432,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
}
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
6c6d68d2
...
...
@@ -181,6 +181,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -204,16 +206,12 @@ __kernel void computeN2Energy(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
@@ -239,7 +237,7 @@ __kernel void computeN2Energy(
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
6c6d68d2
...
...
@@ -201,6 +201,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -220,16 +222,12 @@ __kernel void computeN2Energy(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
@@ -248,7 +246,7 @@ __kernel void computeN2Energy(
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
6c6d68d2
...
...
@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -178,16 +180,12 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
@@ -213,7 +211,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
6c6d68d2
...
...
@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -188,16 +190,12 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
@@ -216,7 +214,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
6c6d68d2
...
...
@@ -169,6 +169,8 @@ __kernel void computeBornSum(
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
if
(
numTiles
>
maxTiles
)
return
; // There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
int
end
=
(
int
)
((
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
#
else
...
...
@@ -190,16 +192,12 @@ __kernel void computeBornSum(
int
x,
y
;
bool
singlePeriodicCopy
=
false
;
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
}
else
#
endif
{
#
else
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
...
...
@@ -225,7 +223,7 @@ __kernel void computeBornSum(
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
...
...
@@ -556,6 +554,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -577,16 +577,12 @@ __kernel void computeGBSAForce1(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
@@ -612,7 +608,7 @@ __kernel void computeGBSAForce1(
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
6c6d68d2
...
...
@@ -178,6 +178,8 @@ __kernel void computeBornSum(
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
if
(
numTiles
>
maxTiles
)
return
; // There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
))
;
int
end
=
(
int
)
((
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
))
;
#
else
...
...
@@ -196,16 +198,12 @@ __kernel void computeBornSum(
int
x,
y
;
bool
singlePeriodicCopy
=
false
;
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
}
else
#
endif
{
#
else
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
...
...
@@ -224,7 +222,7 @@ __kernel void computeBornSum(
nextToSkip = end;
}
includeTile = (nextToSkip != pos);
}
#endif
if (includeTile) {
// Load the data for this tile.
...
...
@@ -593,6 +591,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -611,16 +611,12 @@ __kernel void computeGBSAForce1(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
@@ -639,7 +635,7 @@ __kernel void computeGBSAForce1(
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
}
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment