Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
33df8402
Commit
33df8402
authored
May 24, 2013
by
peastman
Browse files
Fixed bug on pre-Kepler GPUs. Also simplified code a little bit.
parent
178aa003
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
50 deletions
+44
-50
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+4
-12
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+40
-38
No files found.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
33df8402
...
@@ -416,12 +416,6 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
...
@@ -416,12 +416,6 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
}
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
map
<
string
,
string
>
defines
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
{
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
@@ -463,12 +457,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -463,12 +457,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
bool
useShuffle
;
bool
useShuffle
=
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
());
if
(
defines
.
find
(
"ENABLE_SHUFFLE"
)
!=
defines
.
end
())
{
useShuffle
=
true
;
}
else
{
useShuffle
=
false
;
}
// Part 1. Defines for on diagonal exclusion tiles
// Part 1. Defines for on diagonal exclusion tiles
stringstream
loadLocal1
;
stringstream
loadLocal1
;
...
@@ -589,6 +578,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -589,6 +578,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
}
replacements
[
"SHUFFLE_WARP_DATA"
]
=
shuffleWarpData
.
str
();
replacements
[
"SHUFFLE_WARP_DATA"
]
=
shuffleWarpData
.
str
();
map
<
string
,
string
>
defines
;
if
(
useCutoff
)
if
(
useCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
...
@@ -597,6 +587,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -597,6 +587,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
isSymmetric
)
if
(
isSymmetric
)
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useShuffle
)
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
33df8402
...
@@ -12,18 +12,20 @@ typedef struct {
...
@@ -12,18 +12,20 @@ typedef struct {
}
AtomData
;
}
AtomData
;
#endif
#endif
#ifdef ENABLE_SHUFFLE
//support for 64 bit shuffles
//support for 64 bit shuffles
static
__inline__
__device__
float
real_shfl
(
float
var
,
int
srcLane
)
{
static
__inline__
__device__
float
real_shfl
(
float
var
,
int
srcLane
)
{
return
__shfl
(
var
,
srcLane
);
return
__shfl
(
var
,
srcLane
);
}
}
static
__inline__
__device__
double
real_shfl
(
double
var
,
int
srcLane
)
{
static
__inline__
__device__
double
real_shfl
(
double
var
,
int
srcLane
)
{
int
hi
,
lo
;
int
hi
,
lo
;
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"d"
(
var
));
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"d"
(
var
));
hi
=
__shfl
(
hi
,
srcLane
);
hi
=
__shfl
(
hi
,
srcLane
);
lo
=
__shfl
(
lo
,
srcLane
);
lo
=
__shfl
(
lo
,
srcLane
);
return
__hiloint2double
(
hi
,
lo
);
return
__hiloint2double
(
hi
,
lo
);
}
}
#endif
/**
/**
* Compute nonbonded interactions. The kernel is separated into two parts,
* Compute nonbonded interactions. The kernel is separated into two parts,
...
@@ -32,27 +34,27 @@ static __inline__ __device__ double real_shfl(double var, int srcLane) {
...
@@ -32,27 +34,27 @@ static __inline__ __device__ double real_shfl(double var, int srcLane) {
* each of warpsize. Each warp computes a range of tiles.
* each of warpsize. Each warp computes a range of tiles.
*
*
* Tiles with exclusions compute the entire set of interactions across
* Tiles with exclusions compute the entire set of interactions across
* atom blocks, equal to warpsize*warpsize. In order to avoid access conflicts
* atom blocks, equal to warpsize*warpsize. In order to avoid access conflicts
* the forces are computed and accumulated diagonally in the manner shown below
* the forces are computed and accumulated diagonally in the manner shown below
* where, suppose
* where, suppose
*
*
* [a-h] comprise atom block 1, [i-p] comprise atom block 2
* [a-h] comprise atom block 1, [i-p] comprise atom block 2
*
*
* 1 denotes the first set of calculations within the warp
* 1 denotes the first set of calculations within the warp
* 2 denotes the second set of calculations within the warp
* 2 denotes the second set of calculations within the warp
* ... etc.
* ... etc.
*
*
* threads
* threads
* 0 1 2 3 4 5 6 7
* 0 1 2 3 4 5 6 7
* atom1
* atom1
* L a b c d e f g h
* L a b c d e f g h
* o i 1 2 3 4 5 6 7 8
* o i 1 2 3 4 5 6 7 8
* c j 8 1 2 3 4 5 6 7
* c j 8 1 2 3 4 5 6 7
* a k 7 8 1 2 3 4 5 6
* a k 7 8 1 2 3 4 5 6
* l l 6 7 8 1 2 3 4 5
* l l 6 7 8 1 2 3 4 5
* D m 5 6 7 8 1 2 3 4
* D m 5 6 7 8 1 2 3 4
* a n 4 5 6 7 8 1 2 3
* a n 4 5 6 7 8 1 2 3
* t o 3 4 5 6 7 8 1 2
* t o 3 4 5 6 7 8 1 2
* a p 2 3 4 5 6 7 8 1
* a p 2 3 4 5 6 7 8 1
*
*
* Tiles without exclusions read off directly from the neighbourlist interactingAtoms
* Tiles without exclusions read off directly from the neighbourlist interactingAtoms
...
@@ -242,8 +244,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -242,8 +244,8 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
force
.
z
-=
delta
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
delta
.
x
;
shflForce
.
x
+=
delta
.
x
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#else
#else
...
@@ -256,8 +258,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -256,8 +258,8 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
dEdR1
.
y
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
force
.
z
-=
dEdR1
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
...
@@ -419,7 +421,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -419,7 +421,7 @@ extern "C" __global__ void computeNonbonded(
#else
#else
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
real4
posq2
=
make_real4
(
localData
[
atom2
].
x
,
localData
[
atom2
].
y
,
localData
[
atom2
].
z
,
localData
[
atom2
].
q
);
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
@@ -444,8 +446,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -444,8 +446,8 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
force
.
z
-=
delta
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
delta
.
x
;
shflForce
.
x
+=
delta
.
x
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#else
#else
...
@@ -458,8 +460,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -458,8 +460,8 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
dEdR1
.
y
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
force
.
z
-=
dEdR1
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
...
@@ -518,8 +520,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -518,8 +520,8 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
force
.
z
-=
delta
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
delta
.
x
;
shflForce
.
x
+=
delta
.
x
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#else
#else
...
@@ -532,8 +534,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -532,8 +534,8 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
dEdR1
.
y
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
force
.
z
-=
dEdR1
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment