Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8ae2eba5
Commit
8ae2eba5
authored
Aug 12, 2015
by
Robert McGibbon
Browse files
Fix OpenCL platform on low-end devices
parent
7ee7de16
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
31 deletions
+31
-31
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+11
-11
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+4
-4
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+8
-8
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+8
-8
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
8ae2eba5
...
@@ -130,7 +130,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -130,7 +130,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// This will be less than the wavefront width since it takes several
// This will be less than the wavefront width since it takes several
// cycles to execute the full wavefront.
// cycles to execute the full wavefront.
// The SIMD instruction width is the VLIW instruction width (or 1 for scalar),
// The SIMD instruction width is the VLIW instruction width (or 1 for scalar),
// this is the number of ALUs that can be executing per instruction per thread.
// this is the number of ALUs that can be executing per instruction per thread.
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_WIDTH_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_WIDTH_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
>
();
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
>
();
...
@@ -342,9 +342,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -342,9 +342,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"EXP"
]
=
"exp"
;
compilationDefines
[
"EXP"
]
=
"exp"
;
compilationDefines
[
"LOG"
]
=
"log"
;
compilationDefines
[
"LOG"
]
=
"log"
;
}
}
// Set defines for applying periodic boundary conditions.
// Set defines for applying periodic boundary conditions.
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
...
@@ -392,11 +392,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -392,11 +392,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
}
}
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
thread
=
new
WorkThread
();
// Create utilities objects.
// Create utilities objects.
bonded
=
new
OpenCLBondedUtilities
(
*
this
);
bonded
=
new
OpenCLBondedUtilities
(
*
this
);
nonbonded
=
new
OpenCLNonbondedUtilities
(
*
this
);
nonbonded
=
new
OpenCLNonbondedUtilities
(
*
this
);
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
...
@@ -512,7 +512,7 @@ string OpenCLContext::replaceStrings(const string& input, const std::map<std::st
...
@@ -512,7 +512,7 @@ string OpenCLContext::replaceStrings(const string& input, const std::map<std::st
if
(
index
!=
result
.
npos
)
{
if
(
index
!=
result
.
npos
)
{
if
((
index
==
0
||
symbolChars
.
find
(
result
[
index
-
1
])
==
symbolChars
.
end
())
&&
(
index
==
result
.
size
()
-
size
||
symbolChars
.
find
(
result
[
index
+
size
])
==
symbolChars
.
end
()))
{
if
((
index
==
0
||
symbolChars
.
find
(
result
[
index
-
1
])
==
symbolChars
.
end
())
&&
(
index
==
result
.
size
()
-
size
||
symbolChars
.
find
(
result
[
index
+
size
])
==
symbolChars
.
end
()))
{
// We have found a complete symbol, not part of a longer symbol.
// We have found a complete symbol, not part of a longer symbol.
result
.
replace
(
index
,
size
,
iter
->
second
);
result
.
replace
(
index
,
size
,
iter
->
second
);
index
+=
iter
->
second
.
size
();
index
+=
iter
->
second
.
size
();
}
}
...
@@ -797,7 +797,7 @@ private:
...
@@ -797,7 +797,7 @@ private:
void
OpenCLContext
::
findMoleculeGroups
()
{
void
OpenCLContext
::
findMoleculeGroups
()
{
// The first time this is called, we need to identify all the molecules in the system.
// The first time this is called, we need to identify all the molecules in the system.
if
(
moleculeGroups
.
size
()
==
0
)
{
if
(
moleculeGroups
.
size
()
==
0
)
{
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
...
@@ -879,7 +879,7 @@ void OpenCLContext::findMoleculeGroups() {
...
@@ -879,7 +879,7 @@ void OpenCLContext::findMoleculeGroups() {
if
(
!
forces
[
k
]
->
areParticlesIdentical
(
mol
.
atoms
[
i
],
mol2
.
atoms
[
i
]))
if
(
!
forces
[
k
]
->
areParticlesIdentical
(
mol
.
atoms
[
i
],
mol2
.
atoms
[
i
]))
identical
=
false
;
identical
=
false
;
}
}
// See if the constraints are identical.
// See if the constraints are identical.
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
constraints
.
size
()
&&
identical
;
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
constraints
.
size
()
&&
identical
;
i
++
)
{
...
@@ -960,11 +960,11 @@ void OpenCLContext::invalidateMolecules() {
...
@@ -960,11 +960,11 @@ void OpenCLContext::invalidateMolecules() {
}
}
if
(
valid
)
if
(
valid
)
return
;
return
;
// The list of which molecules are identical is no longer valid. We need to restore the
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
// again.
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
vector
<
mm_double4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_double4
>
oldPosq
(
paddedNumAtoms
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
8ae2eba5
...
@@ -186,7 +186,7 @@ static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
...
@@ -186,7 +186,7 @@ static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions
.
resize
(
context
.
getNumAtoms
());
atomExclusions
.
resize
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
atomExclusions
[
i
].
push_back
(
i
);
atomExclusions
[
i
].
push_back
(
i
);
...
@@ -199,7 +199,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -199,7 +199,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
// Build a list of tiles that contain exclusions.
// Build a list of tiles that contain exclusions.
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
...
@@ -410,7 +410,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
...
@@ -410,7 +410,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
// We are using a cutoff, and the kernels have already been created.
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
...
@@ -491,7 +491,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -491,7 +491,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
// The device can't handle this block size, so reduce it.
groupSize
-=
32
;
groupSize
-=
32
;
if
(
groupSize
<
32
)
if
(
groupSize
<
32
)
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
8ae2eba5
...
@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
...
@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
...
@@ -38,7 +38,7 @@ __kernel void computeNonbonded(
...
@@ -38,7 +38,7 @@ __kernel void computeNonbonded(
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
@@ -100,7 +100,7 @@ __kernel void computeNonbonded(
...
@@ -100,7 +100,7 @@ __kernel void computeNonbonded(
}
}
else {
else {
// This is an off-diagonal tile.
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
real4 tempPosq = posq[j];
...
@@ -126,7 +126,7 @@ __kernel void computeNonbonded(
...
@@ -126,7 +126,7 @@ __kernel void computeNonbonded(
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
PRUNE_BY_CUTOFF
#
ifdef
PRUNE_BY_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
real
r
=
r2*invR
;
...
@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
...
@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
bool includeTile = true;
// Extract the coordinates of this tile.
// Extract the coordinates of this tile.
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -245,7 +245,7 @@ __kernel void computeNonbonded(
...
@@ -245,7 +245,7 @@ __kernel void computeNonbonded(
}
}
else
else
skipTiles[get_local_id(0)] = end;
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
currentSkipIndex = tbx;
SYNC_WARPS;
SYNC_WARPS;
}
}
...
@@ -300,7 +300,7 @@ __kernel void computeNonbonded(
...
@@ -300,7 +300,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -352,7 +352,7 @@ __kernel void computeNonbonded(
...
@@ -352,7 +352,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
8ae2eba5
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
...
@@ -31,7 +31,7 @@ __kernel void computeNonbonded(
...
@@ -31,7 +31,7 @@ __kernel void computeNonbonded(
__local
AtomData
localData[TILE_SIZE]
;
__local
AtomData
localData[TILE_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
real
r
=
r2*invR
;
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -228,9 +228,9 @@ __kernel void computeNonbonded(
...
@@ -228,9 +228,9 @@ __kernel void computeNonbonded(
while (pos < end) {
while (pos < end) {
const bool hasExclusions = false;
const bool hasExclusions = false;
bool includeTile = true;
bool includeTile = true;
// Extract the coordinates of this tile.
// Extract the coordinates of this tile.
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
unsigned int atom2 = j;
unsigned int atom2 = j;
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment