Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f4591cad
"plugins/vscode:/vscode.git/clone" did not exist on "80d8311ecb148f019fd9eb8493ad4f03d8f8e5f6"
Commit
f4591cad
authored
May 23, 2013
by
Yutong Zhao
Browse files
bugfix to ifdef conditions in nonbonded.cu
parent
f2276667
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
27 deletions
+29
-27
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+13
-11
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+16
-16
No files found.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
f4591cad
...
@@ -415,11 +415,14 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
...
@@ -415,11 +415,14 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
}
}
#include <map>
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
{
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
...
@@ -462,7 +465,12 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -462,7 +465,12 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
bool
useShuffle
=
(
defines
[
"ENABLE_SHUFFLE"
]
==
"1"
);
bool
useShuffle
;
if
(
defines
.
find
(
"ENABLE_SHUFFLE"
)
!=
defines
.
end
())
{
useShuffle
=
true
;
}
else
{
useShuffle
=
false
;
}
// Part 1. Defines for on diagonal exclusion tiles
// Part 1. Defines for on diagonal exclusion tiles
stringstream
loadLocal1
;
stringstream
loadLocal1
;
...
@@ -510,11 +518,6 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -510,11 +518,6 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
if
(
useShuffle
)
{
if
(
useShuffle
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
declareLocal2
<<
params
[
i
].
getType
()
<<
" shfl"
<<
params
[
i
].
getName
()
<<
";
\n
"
;
declareLocal2
<<
params
[
i
].
getType
()
<<
" shfl"
<<
params
[
i
].
getName
()
<<
";
\n
"
;
//if (params[i].getNumComponents() == 1) {
//declareLocal2<<params[i].getType()<<" "<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
//} else {
// declareLocal2<<params[i].getType()<<" temp"<<params[i].getName()<<";\n";
//}
}
}
}
else
{
}
else
{
// not used if using shared memory
// not used if using shared memory
...
@@ -576,8 +579,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -576,8 +579,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
shuffleWarpData
<<
"shfl"
<<
params
[
i
].
getName
()
<<
"=real_shfl(shfl"
<<
params
[
i
].
getName
()
<<
", tgx+1);
\n
"
;
shuffleWarpData
<<
"shfl"
<<
params
[
i
].
getName
()
<<
"=real_shfl(shfl"
<<
params
[
i
].
getName
()
<<
", tgx+1);
\n
"
;
}
else
{
}
else
{
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
j
++
)
{
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
j
++
)
{
// looks something like
// looks something like shflsigmaEpsilon.x = real_shfl(shflsigmaEpsilon.x,tgx+1);
// shflsigmaEpsilon.x = real_shfl(shflsigmaEpsilon.x,tgx+1);
shuffleWarpData
<<
"shfl"
<<
params
[
i
].
getName
()
shuffleWarpData
<<
"shfl"
<<
params
[
i
].
getName
()
<<
"."
<<
suffixes
[
j
]
<<
"=real_shfl(shfl"
<<
"."
<<
suffixes
[
j
]
<<
"=real_shfl(shfl"
<<
params
[
i
].
getName
()
<<
"."
<<
suffixes
[
j
]
<<
params
[
i
].
getName
()
<<
"."
<<
suffixes
[
j
]
...
@@ -614,8 +616,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -614,8 +616,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"LAST_EXCLUSION_TILE"
]
=
context
.
intToString
(
endExclusionIndex
);
defines
[
"LAST_EXCLUSION_TILE"
]
=
context
.
intToString
(
endExclusionIndex
);
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
context
.
getUseDoublePrecision
())
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
context
.
getUseDoublePrecision
())
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
//
if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision())
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
//
defines["ENABLE_SHUFFLE"] = "1";
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
f4591cad
...
@@ -124,7 +124,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -124,7 +124,7 @@ extern "C" __global__ void computeNonbonded(
// This tile is on the diagonal.
// This tile is on the diagonal.
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
real4
shflPosq
=
posq1
;
real4
shflPosq
=
posq1
;
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
=
posq1
.
x
;
localData
[
threadIdx
.
x
].
x
=
posq1
.
x
;
localData
[
threadIdx
.
x
].
y
=
posq1
.
y
;
localData
[
threadIdx
.
x
].
y
=
posq1
.
y
;
localData
[
threadIdx
.
x
].
z
=
posq1
.
z
;
localData
[
threadIdx
.
x
].
z
=
posq1
.
z
;
...
@@ -139,7 +139,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -139,7 +139,7 @@ extern "C" __global__ void computeNonbonded(
real4
posq2
;
real4
posq2
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
BROADCAST_WARP_DATA
BROADCAST_WARP_DATA
#el
if
#el
se
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
@@ -188,7 +188,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -188,7 +188,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
=
0.0
f
;
shflForce
.
x
=
0.0
f
;
shflForce
.
y
=
0.0
f
;
shflForce
.
y
=
0.0
f
;
shflForce
.
z
=
0.0
f
;
shflForce
.
z
=
0.0
f
;
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
=
shflPosq
.
x
;
localData
[
threadIdx
.
x
].
x
=
shflPosq
.
x
;
localData
[
threadIdx
.
x
].
y
=
shflPosq
.
y
;
localData
[
threadIdx
.
x
].
y
=
shflPosq
.
y
;
localData
[
threadIdx
.
x
].
z
=
shflPosq
.
z
;
localData
[
threadIdx
.
x
].
z
=
shflPosq
.
z
;
...
@@ -207,7 +207,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -207,7 +207,7 @@ extern "C" __global__ void computeNonbonded(
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
real4
posq2
=
shflPosq
;
real4
posq2
=
shflPosq
;
#el
if
#el
se
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
@@ -246,7 +246,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -246,7 +246,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
...
@@ -259,7 +259,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -259,7 +259,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
@@ -284,7 +284,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -284,7 +284,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
z
*
0x100000000
)));
#el
if
#el
se
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
...
@@ -383,7 +383,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -383,7 +383,7 @@ extern "C" __global__ void computeNonbonded(
// Load position of atom j from from global memory
// Load position of atom j from from global memory
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflPosq
=
posq
[
j
];
shflPosq
=
posq
[
j
];
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
=
posq
[
j
].
x
;
localData
[
threadIdx
.
x
].
x
=
posq
[
j
].
x
;
localData
[
threadIdx
.
x
].
y
=
posq
[
j
].
y
;
localData
[
threadIdx
.
x
].
y
=
posq
[
j
].
y
;
localData
[
threadIdx
.
x
].
z
=
posq
[
j
].
z
;
localData
[
threadIdx
.
x
].
z
=
posq
[
j
].
z
;
...
@@ -406,7 +406,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -406,7 +406,7 @@ extern "C" __global__ void computeNonbonded(
shflPosq
.
x
-=
floor
((
shflPosq
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
shflPosq
.
x
-=
floor
((
shflPosq
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
shflPosq
.
y
-=
floor
((
shflPosq
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
shflPosq
.
y
-=
floor
((
shflPosq
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
shflPosq
.
z
-=
floor
((
shflPosq
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
shflPosq
.
z
-=
floor
((
shflPosq
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#el
if
#el
se
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
...
@@ -416,7 +416,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -416,7 +416,7 @@ extern "C" __global__ void computeNonbonded(
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
real4
posq2
=
shflPosq
;
real4
posq2
=
shflPosq
;
#el
if
#el
se
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
@@ -448,7 +448,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -448,7 +448,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
...
@@ -461,7 +461,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -461,7 +461,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
@@ -483,7 +483,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -483,7 +483,7 @@ extern "C" __global__ void computeNonbonded(
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
real4
posq2
=
shflPosq
;
real4
posq2
=
shflPosq
;
#el
if
#el
se
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
...
@@ -522,7 +522,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -522,7 +522,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
...
@@ -535,7 +535,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -535,7 +535,7 @@ extern "C" __global__ void computeNonbonded(
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#el
if
#el
se
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
@@ -565,7 +565,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -565,7 +565,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
z
*
0x100000000
)));
#el
if
#el
se
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment