Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b0161939
Commit
b0161939
authored
Oct 05, 2012
by
Peter Eastman
Browse files
Eliminated mixed precision accumulation for nonbonded forces, which had negligible benefit
parent
c34bd3b2
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
9 additions
and
51 deletions
+9
-51
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+2
-26
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+0
-12
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+3
-5
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+2
-4
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+2
-4
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
b0161939
...
...
@@ -2040,18 +2040,6 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
nb
.
getForceThreadBlockSize
());
map
<
string
,
string
>
replacements
;
stringstream
defineAccum
;
if
(
cu
.
getUseMixedPrecision
())
{
defineAccum
<<
"typedef double accum;
\n
"
;
defineAccum
<<
"typedef double4 accum4;
\n
"
;
defines
[
"make_accum4"
]
=
"make_double4"
;
}
else
{
defineAccum
<<
"typedef real accum;
\n
"
;
defineAccum
<<
"typedef real4 accum4;
\n
"
;
defines
[
"make_accum4"
]
=
"make_real4"
;
}
replacements
[
"DEFINE_ACCUM"
]
=
defineAccum
.
str
();
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
cu
.
replaceStrings
(
CudaKernelSources
::
gbsaObc1
,
replacements
),
defines
);
computeBornSumKernel
=
cu
.
getKernel
(
module
,
"computeBornSum"
);
computeSumArgs
.
push_back
(
&
bornSum
->
getDevicePointer
());
...
...
@@ -2534,9 +2522,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs
<<
", unsigned long long* __restrict__ derivBuffers"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
index
=
cu
.
intToString
(
i
+
1
);
atomParams
<<
"
accum
deriv"
<<
index
<<
";
\n
"
;
atomParams
<<
"
real
deriv"
<<
index
<<
";
\n
"
;
clearLocal
<<
"localData[localAtomIndex].deriv"
<<
index
<<
" = 0;
\n
"
;
declare1
<<
"
accum
deriv"
<<
index
<<
"_1 = 0;
\n
"
;
declare1
<<
"
real
deriv"
<<
index
<<
"_1 = 0;
\n
"
;
load2
<<
"real deriv"
<<
index
<<
"_2 = 0;
\n
"
;
recordDeriv
<<
"localData[atom2].deriv"
<<
index
<<
" += deriv"
<<
index
<<
"_2;
\n
"
;
storeDerivs1
<<
"STORE_DERIVATIVE_1("
<<
index
<<
")
\n
"
;
...
...
@@ -2555,18 +2543,6 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
replacements
[
"STORE_DERIVATIVES_1"
]
=
storeDerivs1
.
str
();
replacements
[
"STORE_DERIVATIVES_2"
]
=
storeDerivs2
.
str
();
map
<
string
,
string
>
defines
;
stringstream
defineAccum
;
if
(
cu
.
getUseMixedPrecision
())
{
defineAccum
<<
"typedef double accum;
\n
"
;
defineAccum
<<
"typedef double3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_double3"
;
}
else
{
defineAccum
<<
"typedef real accum;
\n
"
;
defineAccum
<<
"typedef real3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_real3"
;
}
replacements
[
"DEFINE_ACCUM"
]
=
defineAccum
.
str
();
if
(
useCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
b0161939
...
...
@@ -450,18 +450,6 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
stringstream
defineAccum
;
if
(
context
.
getUseMixedPrecision
())
{
defineAccum
<<
"typedef double accum;
\n
"
;
defineAccum
<<
"typedef double3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_double3"
;
}
else
{
defineAccum
<<
"typedef real accum;
\n
"
;
defineAccum
<<
"typedef real3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_real3"
;
}
replacements
[
"DEFINE_ACCUM"
]
=
defineAccum
.
str
();
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
b0161939
...
...
@@ -2,11 +2,9 @@
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0xFFFFFFFF)));
#define TILE_SIZE 32
DEFINE_ACCUM
typedef
struct
{
real4
posq
;
accum
3
force
;
real
3
force
;
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
float
padding
;
...
...
@@ -47,7 +45,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
accum
3
force
=
make_
accum
3
(
0
);
real
3
force
=
make_
real
3
(
0
);
DECLARE_ATOM1_DERIVATIVES
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
...
...
@@ -143,7 +141,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
localData
[
localAtomIndex
].
force
=
make_
accum
3
(
0
);
localData
[
localAtomIndex
].
force
=
make_
real
3
(
0
);
CLEAR_LOCAL_DERIVATIVES
#ifdef USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
b0161939
...
...
@@ -4,8 +4,6 @@
#define TILE_SIZE 32
#define WARPS_PER_GROUP (FORCE_WORK_GROUP_SIZE/TILE_SIZE)
DEFINE_ACCUM
/**
* Reduce the Born sums to compute the Born radii.
*/
...
...
@@ -333,7 +331,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
typedef
struct
{
real
x
,
y
,
z
;
real
q
;
accum
fx
,
fy
,
fz
,
fw
;
real
fx
,
fy
,
fz
,
fw
;
real
bornRadius
;
}
AtomData2
;
...
...
@@ -374,7 +372,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
accum
4
force
=
make_
accum
4
(
0
);
real
4
force
=
make_
real
4
(
0
);
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
b0161939
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
DEFINE_ACCUM
typedef
struct
{
real
x
,
y
,
z
;
real
q
;
accum
fx
,
fy
,
fz
;
real
fx
,
fy
,
fz
;
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
real
padding
;
...
...
@@ -49,7 +47,7 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
accum
3
force
=
make_
accum
3
(
0
);
real
3
force
=
make_
real
3
(
0
);
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment