Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cddeb60f
Commit
cddeb60f
authored
Mar 28, 2016
by
Peter Eastman
Browse files
Added property to request deterministic forces
parent
375a7081
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
71 additions
and
10 deletions
+71
-10
docs-source/usersguide/library.rst
docs-source/usersguide/library.rst
+9
-1
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+9
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+3
-1
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+11
-2
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+1
-1
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+1
-1
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+35
-0
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+1
-1
No files found.
docs-source/usersguide/library.rst
View file @
cddeb60f
...
...
@@ -1925,7 +1925,15 @@ The CUDA Platform recognizes the following Platform-specific properties:
computation, allowing the CPU to do other work. If it is set to “false”, CUDA
will spin-lock while the GPU is working. Setting it to "false" can improve performance slightly,
but also prevents the CPU from doing anything else while the GPU is working.
* CudaDeterministicForces: In some cases, the CUDA platform may compute forces
in ways that are not fully deterministic (typically differing in what order a
set of numbers get added together). This means that if you compute the forces
twice for the same particle positions, there may be tiny differences in the
results. In most cases this is not a problem, but certain algorithms depend
on forces being exactly reproducible to the last bit. If you set this
property to "true", it will instead do these calculations in a way that
produces fully deterministic results, at the cost of a small decrease in
performance.
The CUDA Platform also supports parallelizing a simulation across multiple GPUs.
To do that, set the CudaDeviceIndex property to a comma separated list of
...
...
platforms/cuda/include/CudaPlatform.h
View file @
cddeb60f
...
...
@@ -117,20 +117,27 @@ public:
static
const
std
::
string
key
=
"CudaDisablePmeStream"
;
return
key
;
}
/**
* This is the name of the parameter for requesting that force computations be fully deterministic.
*/
static
const
std
::
string
&
CudaDeterministicForces
()
{
static
const
std
::
string
key
=
"CudaDeterministicForces"
;
return
key
;
}
};
class
OPENMM_EXPORT_CUDA
CudaPlatform
::
PlatformData
{
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
const
std
::
string
&
pmeStreamProperty
,
const
std
::
string
&
deterministicForcesProperty
,
int
numThreads
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
ContextImpl
*
context
;
std
::
vector
<
CudaContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
hasInitializedContexts
,
removeCM
,
peerAccessSupported
,
useCpuPme
,
disablePmeStream
;
bool
hasInitializedContexts
,
removeCM
,
peerAccessSupported
,
useCpuPme
,
disablePmeStream
,
deterministicForces
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
cddeb60f
...
...
@@ -1693,6 +1693,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
if
(
usePmeStream
)
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
if
(
cu
.
getPlatformData
().
deterministicForces
)
pmeDefines
[
"USE_DETERMINISTIC_FORCES"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
...
...
@@ -1920,7 +1922,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeSpreadChargeKernel
,
spreadArgs
,
cu
.
getNumAtoms
(),
128
);
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getComputeCapability
()
<
2.0
)
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getComputeCapability
()
<
2.0
||
cu
.
getPlatformData
().
deterministicForces
)
{
void
*
finishSpreadArgs
[]
=
{
&
directPmeGrid
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
directPmeGrid
->
getSize
());
}
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
cddeb60f
...
...
@@ -102,12 +102,14 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaHostCompiler
());
platformProperties
.
push_back
(
CudaDisablePmeStream
());
platformProperties
.
push_back
(
CudaDeterministicForces
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
CudaDisablePmeStream
(),
"false"
);
setPropertyDefaultValue
(
CudaDeterministicForces
(),
"false"
);
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
...
@@ -168,10 +170,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
string
pmeStreamPropValue
=
(
properties
.
find
(
CudaDisablePmeStream
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDisablePmeStream
())
:
properties
.
find
(
CudaDisablePmeStream
())
->
second
);
string
deterministicForcesValue
=
(
properties
.
find
(
CudaDeterministicForces
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDeterministicForces
())
:
properties
.
find
(
CudaDeterministicForces
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
transform
(
pmeStreamPropValue
.
begin
(),
pmeStreamPropValue
.
end
(),
pmeStreamPropValue
.
begin
(),
::
tolower
);
transform
(
deterministicForcesValue
.
begin
(),
deterministicForcesValue
.
end
(),
deterministicForcesValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
...
...
@@ -180,7 +185,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
threads
));
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -189,7 +195,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
const
string
&
deterministicForcesProperty
,
int
numThreads
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
...
...
@@ -230,6 +237,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
...
...
@@ -239,6 +247,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
propertyValues
[
CudaPlatform
::
CudaDisablePmeStream
()]
=
disablePmeStream
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaDeterministicForces
()]
=
deterministicForces
?
"true"
:
"false"
;
contextEnergy
.
resize
(
contexts
.
size
());
// Determine whether peer-to-peer copying is supported, and enable it if so.
...
...
platforms/cuda/src/kernels/pme.cu
View file @
cddeb60f
...
...
@@ -83,7 +83,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
#elif __CUDA_ARCH__ < 200
#elif __CUDA_ARCH__ < 200
|| defined(USE_DETERMINISTIC_FORCES)
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
int
gridIndex
=
index
;
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
)
/
2
);
...
...
platforms/cuda/tests/TestCudaFFT3D.cpp
View file @
cddeb60f
...
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
cddeb60f
...
...
@@ -118,9 +118,44 @@ void testReordering() {
}
}
void
testDeterministicForces
()
{
// Check that the CudaDeterministicForces property works correctly.
const
int
numParticles
=
1000
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
6
,
0
,
0
),
Vec3
(
2.1
,
6
,
0
),
Vec3
(
-
1.5
,
-
0.5
,
6
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
1
,
0
);
positions
.
push_back
(
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
)
*
6
);
}
VerletIntegrator
integrator
(
0.001
);
map
<
string
,
string
>
properties
;
properties
[
CudaPlatform
::
CudaDeterministicForces
()]
=
"true"
;
Context
context
(
system
,
integrator
,
platform
,
properties
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
State
state2
=
context
.
getState
(
State
::
Forces
);
// All forces should be *exactly* equal.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
0
],
state2
.
getForces
()[
i
][
0
]);
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
1
],
state2
.
getForces
()[
i
][
1
]);
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
2
],
state2
.
getForces
()[
i
][
2
]);
}
}
void
runPlatformTests
()
{
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
PME
);
testReordering
();
testDeterministicForces
();
}
platforms/cuda/tests/TestCudaRandom.cpp
View file @
cddeb60f
...
...
@@ -56,7 +56,7 @@ void testGaussian() {
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/cuda/tests/TestCudaSort.cpp
View file @
cddeb60f
...
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment