Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f4591cad
Commit
f4591cad
authored
May 23, 2013
by
Yutong Zhao
Browse files
bugfix to ifdef conditions in nonbonded.cu
parent
f2276667
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
27 deletions
+29
-27
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+13
-11
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+16
-16
No files found.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
f4591cad
...
...
@@ -415,11 +415,14 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
}
#include <map>
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
map
<
string
,
string
>
defines
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
{
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
...
...
@@ -462,7 +465,12 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
bool
useShuffle
=
(
defines
[
"ENABLE_SHUFFLE"
]
==
"1"
);
bool
useShuffle
;
if
(
defines
.
find
(
"ENABLE_SHUFFLE"
)
!=
defines
.
end
())
{
useShuffle
=
true
;
}
else
{
useShuffle
=
false
;
}
// Part 1. Defines for on diagonal exclusion tiles
stringstream
loadLocal1
;
...
...
@@ -510,11 +518,6 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
if
(
useShuffle
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
declareLocal2
<<
params
[
i
].
getType
()
<<
" shfl"
<<
params
[
i
].
getName
()
<<
";
\n
"
;
//if (params[i].getNumComponents() == 1) {
//declareLocal2<<params[i].getType()<<" "<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
//} else {
// declareLocal2<<params[i].getType()<<" temp"<<params[i].getName()<<";\n";
//}
}
}
else
{
// not used if using shared memory
...
...
@@ -576,8 +579,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
shuffleWarpData
<<
"shfl"
<<
params
[
i
].
getName
()
<<
"=real_shfl(shfl"
<<
params
[
i
].
getName
()
<<
", tgx+1);
\n
"
;
}
else
{
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
j
++
)
{
// looks something like
// shflsigmaEpsilon.x = real_shfl(shflsigmaEpsilon.x,tgx+1);
// looks something like shflsigmaEpsilon.x = real_shfl(shflsigmaEpsilon.x,tgx+1);
shuffleWarpData
<<
"shfl"
<<
params
[
i
].
getName
()
<<
"."
<<
suffixes
[
j
]
<<
"=real_shfl(shfl"
<<
params
[
i
].
getName
()
<<
"."
<<
suffixes
[
j
]
...
...
@@ -614,8 +616,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"LAST_EXCLUSION_TILE"
]
=
context
.
intToString
(
endExclusionIndex
);
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
context
.
getUseDoublePrecision
())
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
//
if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision())
//
defines["ENABLE_SHUFFLE"] = "1";
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
f4591cad
...
...
@@ -124,7 +124,7 @@ extern "C" __global__ void computeNonbonded(
// This tile is on the diagonal.
#ifdef ENABLE_SHUFFLE
real4
shflPosq
=
posq1
;
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
=
posq1
.
x
;
localData
[
threadIdx
.
x
].
y
=
posq1
.
y
;
localData
[
threadIdx
.
x
].
z
=
posq1
.
z
;
...
...
@@ -139,7 +139,7 @@ extern "C" __global__ void computeNonbonded(
real4
posq2
;
#ifdef ENABLE_SHUFFLE
BROADCAST_WARP_DATA
#el
if
#el
se
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
...
@@ -188,7 +188,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
=
0.0
f
;
shflForce
.
y
=
0.0
f
;
shflForce
.
z
=
0.0
f
;
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
=
shflPosq
.
x
;
localData
[
threadIdx
.
x
].
y
=
shflPosq
.
y
;
localData
[
threadIdx
.
x
].
z
=
shflPosq
.
z
;
...
...
@@ -207,7 +207,7 @@ extern "C" __global__ void computeNonbonded(
int
atom2
=
tbx
+
tj
;
#ifdef ENABLE_SHUFFLE
real4
posq2
=
shflPosq
;
#el
if
#el
se
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
...
@@ -246,7 +246,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
...
...
@@ -259,7 +259,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
...
@@ -284,7 +284,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
z
*
0x100000000
)));
#el
if
#el
se
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
...
...
@@ -383,7 +383,7 @@ extern "C" __global__ void computeNonbonded(
// Load position of atom j from from global memory
#ifdef ENABLE_SHUFFLE
shflPosq
=
posq
[
j
];
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
=
posq
[
j
].
x
;
localData
[
threadIdx
.
x
].
y
=
posq
[
j
].
y
;
localData
[
threadIdx
.
x
].
z
=
posq
[
j
].
z
;
...
...
@@ -406,7 +406,7 @@ extern "C" __global__ void computeNonbonded(
shflPosq
.
x
-=
floor
((
shflPosq
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
shflPosq
.
y
-=
floor
((
shflPosq
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
shflPosq
.
z
-=
floor
((
shflPosq
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
...
...
@@ -416,7 +416,7 @@ extern "C" __global__ void computeNonbonded(
int
atom2
=
tbx
+
tj
;
#ifdef ENABLE_SHUFFLE
real4
posq2
=
shflPosq
;
#el
if
#el
se
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
...
@@ -448,7 +448,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
...
...
@@ -461,7 +461,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
...
@@ -483,7 +483,7 @@ extern "C" __global__ void computeNonbonded(
int
atom2
=
tbx
+
tj
;
#ifdef ENABLE_SHUFFLE
real4
posq2
=
shflPosq
;
#el
if
#el
se
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
...
@@ -522,7 +522,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
...
...
@@ -535,7 +535,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
...
@@ -565,7 +565,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
z
*
0x100000000
)));
#el
if
#el
se
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment