Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8a762772
Commit
8a762772
authored
Aug 12, 2015
by
peastman
Browse files
Merge pull request #1091 from rmcgibbo/osx-opencl
Fix OpenCL platform on low-end devices
parents
14bd3629
8ae2eba5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
31 deletions
+31
-31
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+11
-11
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+4
-4
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+8
-8
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+8
-8
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
8a762772
...
@@ -130,7 +130,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -130,7 +130,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// This will be less than the wavefront width since it takes several
// This will be less than the wavefront width since it takes several
// cycles to execute the full wavefront.
// cycles to execute the full wavefront.
// The SIMD instruction width is the VLIW instruction width (or 1 for scalar),
// The SIMD instruction width is the VLIW instruction width (or 1 for scalar),
// this is the number of ALUs that can be executing per instruction per thread.
// this is the number of ALUs that can be executing per instruction per thread.
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_WIDTH_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_WIDTH_AMD
>
()
*
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
>
();
devices
[
i
].
getInfo
<
CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
>
();
...
@@ -342,9 +342,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -342,9 +342,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"EXP"
]
=
"exp"
;
compilationDefines
[
"EXP"
]
=
"exp"
;
compilationDefines
[
"LOG"
]
=
"log"
;
compilationDefines
[
"LOG"
]
=
"log"
;
}
}
// Set defines for applying periodic boundary conditions.
// Set defines for applying periodic boundary conditions.
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
...
@@ -392,11 +392,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -392,11 +392,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
}
}
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
thread
=
new
WorkThread
();
// Create utilities objects.
// Create utilities objects.
bonded
=
new
OpenCLBondedUtilities
(
*
this
);
bonded
=
new
OpenCLBondedUtilities
(
*
this
);
nonbonded
=
new
OpenCLNonbondedUtilities
(
*
this
);
nonbonded
=
new
OpenCLNonbondedUtilities
(
*
this
);
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
integration
=
new
OpenCLIntegrationUtilities
(
*
this
,
system
);
...
@@ -512,7 +512,7 @@ string OpenCLContext::replaceStrings(const string& input, const std::map<std::st
...
@@ -512,7 +512,7 @@ string OpenCLContext::replaceStrings(const string& input, const std::map<std::st
if
(
index
!=
result
.
npos
)
{
if
(
index
!=
result
.
npos
)
{
if
((
index
==
0
||
symbolChars
.
find
(
result
[
index
-
1
])
==
symbolChars
.
end
())
&&
(
index
==
result
.
size
()
-
size
||
symbolChars
.
find
(
result
[
index
+
size
])
==
symbolChars
.
end
()))
{
if
((
index
==
0
||
symbolChars
.
find
(
result
[
index
-
1
])
==
symbolChars
.
end
())
&&
(
index
==
result
.
size
()
-
size
||
symbolChars
.
find
(
result
[
index
+
size
])
==
symbolChars
.
end
()))
{
// We have found a complete symbol, not part of a longer symbol.
// We have found a complete symbol, not part of a longer symbol.
result
.
replace
(
index
,
size
,
iter
->
second
);
result
.
replace
(
index
,
size
,
iter
->
second
);
index
+=
iter
->
second
.
size
();
index
+=
iter
->
second
.
size
();
}
}
...
@@ -797,7 +797,7 @@ private:
...
@@ -797,7 +797,7 @@ private:
void
OpenCLContext
::
findMoleculeGroups
()
{
void
OpenCLContext
::
findMoleculeGroups
()
{
// The first time this is called, we need to identify all the molecules in the system.
// The first time this is called, we need to identify all the molecules in the system.
if
(
moleculeGroups
.
size
()
==
0
)
{
if
(
moleculeGroups
.
size
()
==
0
)
{
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
...
@@ -879,7 +879,7 @@ void OpenCLContext::findMoleculeGroups() {
...
@@ -879,7 +879,7 @@ void OpenCLContext::findMoleculeGroups() {
if
(
!
forces
[
k
]
->
areParticlesIdentical
(
mol
.
atoms
[
i
],
mol2
.
atoms
[
i
]))
if
(
!
forces
[
k
]
->
areParticlesIdentical
(
mol
.
atoms
[
i
],
mol2
.
atoms
[
i
]))
identical
=
false
;
identical
=
false
;
}
}
// See if the constraints are identical.
// See if the constraints are identical.
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
constraints
.
size
()
&&
identical
;
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
mol
.
constraints
.
size
()
&&
identical
;
i
++
)
{
...
@@ -960,11 +960,11 @@ void OpenCLContext::invalidateMolecules() {
...
@@ -960,11 +960,11 @@ void OpenCLContext::invalidateMolecules() {
}
}
if
(
valid
)
if
(
valid
)
return
;
return
;
// The list of which molecules are identical is no longer valid. We need to restore the
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
// again.
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
vector
<
mm_double4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_double4
>
oldPosq
(
paddedNumAtoms
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
8a762772
...
@@ -186,7 +186,7 @@ static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
...
@@ -186,7 +186,7 @@ static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions
.
resize
(
context
.
getNumAtoms
());
atomExclusions
.
resize
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
atomExclusions
[
i
].
push_back
(
i
);
atomExclusions
[
i
].
push_back
(
i
);
...
@@ -199,7 +199,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -199,7 +199,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
// Build a list of tiles that contain exclusions.
// Build a list of tiles that contain exclusions.
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
...
@@ -410,7 +410,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
...
@@ -410,7 +410,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
// We are using a cutoff, and the kernels have already been created.
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
...
@@ -491,7 +491,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -491,7 +491,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
// The device can't handle this block size, so reduce it.
groupSize
-=
32
;
groupSize
-=
32
;
if
(
groupSize
<
32
)
if
(
groupSize
<
32
)
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
8a762772
...
@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
...
@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
...
@@ -38,7 +38,7 @@ __kernel void computeNonbonded(
...
@@ -38,7 +38,7 @@ __kernel void computeNonbonded(
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
@@ -100,7 +100,7 @@ __kernel void computeNonbonded(
...
@@ -100,7 +100,7 @@ __kernel void computeNonbonded(
}
}
else {
else {
// This is an off-diagonal tile.
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
real4 tempPosq = posq[j];
...
@@ -126,7 +126,7 @@ __kernel void computeNonbonded(
...
@@ -126,7 +126,7 @@ __kernel void computeNonbonded(
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
PRUNE_BY_CUTOFF
#
ifdef
PRUNE_BY_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
real
r
=
r2*invR
;
...
@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
...
@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
bool includeTile = true;
// Extract the coordinates of this tile.
// Extract the coordinates of this tile.
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -245,7 +245,7 @@ __kernel void computeNonbonded(
...
@@ -245,7 +245,7 @@ __kernel void computeNonbonded(
}
}
else
else
skipTiles[get_local_id(0)] = end;
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
currentSkipIndex = tbx;
SYNC_WARPS;
SYNC_WARPS;
}
}
...
@@ -300,7 +300,7 @@ __kernel void computeNonbonded(
...
@@ -300,7 +300,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -352,7 +352,7 @@ __kernel void computeNonbonded(
...
@@ -352,7 +352,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
8a762772
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
...
@@ -31,7 +31,7 @@ __kernel void computeNonbonded(
...
@@ -31,7 +31,7 @@ __kernel void computeNonbonded(
__local
AtomData
localData[TILE_SIZE]
;
__local
AtomData
localData[TILE_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
real
r
=
r2*invR
;
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -228,9 +228,9 @@ __kernel void computeNonbonded(
...
@@ -228,9 +228,9 @@ __kernel void computeNonbonded(
while (pos < end) {
while (pos < end) {
const bool hasExclusions = false;
const bool hasExclusions = false;
bool includeTile = true;
bool includeTile = true;
// Extract the coordinates of this tile.
// Extract the coordinates of this tile.
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
unsigned int atom2 = j;
unsigned int atom2 = j;
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment