Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b1d621b3
Commit
b1d621b3
authored
Jul 01, 2013
by
Yutong Zhao
Browse files
Merge pull request #32 from peastman/pme
Created CPU implementation of PME
parents
cce1ca81
794ab9a7
Changes
30
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
678 additions
and
218 deletions
+678
-218
CMakeLists.txt
CMakeLists.txt
+14
-0
cmake_modules/FindFFTW.cmake
cmake_modules/FindFFTW.cmake
+25
-0
olla/include/openmm/kernels.h
olla/include/openmm/kernels.h
+64
-0
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+11
-3
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+12
-0
platforms/cuda/src/CudaContext.h
platforms/cuda/src/CudaContext.h
+60
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+183
-108
platforms/cuda/src/CudaKernels.h
platforms/cuda/src/CudaKernels.h
+6
-1
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+14
-3
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+13
-3
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+1
-1
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+10
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+12
-0
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+60
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+165
-90
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+6
-1
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+15
-2
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+5
-0
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+1
-1
No files found.
CMakeLists.txt
View file @
b1d621b3
...
...
@@ -396,6 +396,20 @@ IF(OPENMM_BUILD_DRUDE_PLUGIN)
ADD_SUBDIRECTORY
(
plugins/drude
)
ENDIF
(
OPENMM_BUILD_DRUDE_PLUGIN
)
# CPU PME plugin
FIND_PACKAGE
(
FFTW QUIET
)
IF
(
FFTW_FOUND
)
SET
(
OPENMM_BUILD_PME_PLUGIN ON CACHE BOOL
"Build CPU PME plugin"
)
ELSE
(
FFTW_FOUND
)
SET
(
OPENMM_BUILD_PME_PLUGIN OFF CACHE BOOL
"Build CPU PME plugin"
)
ENDIF
(
FFTW_FOUND
)
SET
(
OPENMM_BUILD_PME_PATH
)
IF
(
OPENMM_BUILD_PME_PLUGIN
)
SET
(
OPENMM_BUILD_PME_PATH
${
CMAKE_CURRENT_SOURCE_DIR
}
/plugins/cpupme
)
ADD_SUBDIRECTORY
(
plugins/cpupme
)
ENDIF
(
OPENMM_BUILD_PME_PLUGIN
)
INSTALL_TARGETS
(
/lib RUNTIME_DIRECTORY /lib
${
SHARED_TARGET
}
)
IF
(
OPENMM_BUILD_STATIC_LIB
)
INSTALL_TARGETS
(
/lib RUNTIME_DIRECTORY /lib
${
STATIC_TARGET
}
)
...
...
cmake_modules/FindFFTW.cmake
0 → 100644
View file @
b1d621b3
# - Find FFTW
# Find the native FFTW includes and library
#
# FFTW_INCLUDES - where to find fftw3.h
# FFTW_LIBRARY - the main FFTW library.
# FFTW_THREADS_LIBRARY - the FFTW multithreading support library.
# FFTW_FOUND - True if FFTW found.
if
(
FFTW_INCLUDES
)
# Already in cache, be silent
set
(
FFTW_FIND_QUIETLY TRUE
)
endif
(
FFTW_INCLUDES
)
find_path
(
FFTW_INCLUDES fftw3.h
)
find_library
(
FFTW_LIBRARY NAMES fftw3f
)
find_library
(
FFTW_THREADS_LIBRARY NAMES fftw3f_threads
)
# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
# all listed variables are TRUE
include
(
FindPackageHandleStandardArgs
)
find_package_handle_standard_args
(
FFTW DEFAULT_MSG FFTW_LIBRARY FFTW_INCLUDES
)
find_package_handle_standard_args
(
FFTW_THREADS DEFAULT_MSG FFTW_THREADS_LIBRARY FFTW_INCLUDES
)
mark_as_advanced
(
FFTW_LIBRARY FFTW_THREADS_LIBRARY FFTW_INCLUDES
)
olla/include/openmm/kernels.h
View file @
b1d621b3
...
...
@@ -1152,6 +1152,70 @@ public:
virtual
void
execute
(
ContextImpl
&
context
)
=
0
;
};
/**
* This kernel performs the reciprocal space calculation for PME. In most cases, this
* calculation is done directly by CalcNonbondedForceKernel so this kernel is unneeded.
* In some cases it may want to outsource the work to a different kernel. In particular,
* GPU based platforms sometimes use a CPU based implementation provided by a separate
* plugin.
*/
class
CalcPmeReciprocalForceKernel
:
public
KernelImpl
{
public:
class
IO
;
static
std
::
string
Name
()
{
return
"CalcPmeReciprocalForce"
;
}
CalcPmeReciprocalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
)
:
KernelImpl
(
name
,
platform
)
{
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
virtual
void
initialize
(
int
gridx
,
int
gridy
,
int
gridz
,
int
numParticles
,
double
alpha
)
=
0
;
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
virtual
void
beginComputation
(
IO
&
io
,
Vec3
periodicBoxSize
,
bool
includeEnergy
)
=
0
;
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
virtual
double
finishComputation
(
IO
&
io
)
=
0
;
};
/**
* Any class that uses CalcPmeReciprocalForceKernel should create an implementation of this
* class, then pass it to the kernel to manage communication with it.
*/
class
CalcPmeReciprocalForceKernel
::
IO
{
public:
/**
* Get a pointer to the atom charges and positions. This array should contain four
* elements for each atom: x, y, z, and q in that order.
*/
virtual
float
*
getPosq
()
=
0
;
/**
* Record the forces calculated by the kernel.
*
* @param force an array containing four elements for each atom. The first three
* are the x, y, and z components of the force, while the fourth element
* should be ignored.
*/
virtual
void
setForce
(
float
*
force
)
=
0
;
};
}
// namespace OpenMM
#endif
/*OPENMM_KERNELS_H_*/
platforms/cuda/include/CudaPlatform.h
View file @
b1d621b3
...
...
@@ -81,6 +81,13 @@ public:
static
const
std
::
string
key
=
"CudaPrecision"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
static
const
std
::
string
&
CudaUseCpuPme
()
{
static
const
std
::
string
key
=
"CudaUseCpuPme"
;
return
key
;
}
/**
* This is the name of the parameter for specifying the path to the CUDA compiler.
*/
...
...
@@ -99,14 +106,15 @@ public:
class
OPENMM_EXPORT_CUDA
CudaPlatform
::
PlatformData
{
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
);
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
ContextImpl
*
context
;
std
::
vector
<
CudaContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
removeCM
,
peerAccessSupported
;
bool
removeCM
,
peerAccessSupported
,
useCpuPme
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
b1d621b3
...
...
@@ -231,6 +231,10 @@ CudaContext::~CudaContext() {
delete
forces
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
delete
reorderListeners
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
preComputations
.
size
();
i
++
)
delete
preComputations
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
postComputations
.
size
();
i
++
)
delete
postComputations
[
i
];
if
(
pinnedBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedBuffer
);
if
(
posq
!=
NULL
)
...
...
@@ -1102,6 +1106,14 @@ void CudaContext::addReorderListener(ReorderListener* listener) {
reorderListeners
.
push_back
(
listener
);
}
void
CudaContext
::
addPreComputation
(
ForcePreComputation
*
computation
)
{
preComputations
.
push_back
(
computation
);
}
void
CudaContext
::
addPostComputation
(
ForcePostComputation
*
computation
)
{
postComputations
.
push_back
(
computation
);
}
struct
CudaContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
CudaContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/cuda/src/CudaContext.h
View file @
b1d621b3
...
...
@@ -70,6 +70,8 @@ public:
class
WorkTask
;
class
WorkThread
;
class
ReorderListener
;
class
ForcePreComputation
;
class
ForcePostComputation
;
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
std
::
string
&
precision
,
...
...
@@ -454,6 +456,28 @@ public:
std
::
vector
<
ReorderListener
*>&
getReorderListeners
()
{
return
reorderListeners
;
}
/**
* Add a pre-computation that should be called at the very start of force and energy evaluations.
* The CudaContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPreComputation
(
ForcePreComputation
*
computation
);
/**
* Get the list of ForcePreComputations.
*/
std
::
vector
<
ForcePreComputation
*>&
getPreComputations
()
{
return
preComputations
;
}
/**
* Add a post-computation that should be called at the very end of force and energy evaluations.
* The CudaContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPostComputation
(
ForcePostComputation
*
computation
);
/**
* Get the list of ForcePostComputations.
*/
std
::
vector
<
ForcePostComputation
*>&
getPostComputations
()
{
return
postComputations
;
}
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
...
...
@@ -519,6 +543,8 @@ private:
std
::
vector
<
CUdeviceptr
>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
std
::
vector
<
ForcePreComputation
*>
preComputations
;
std
::
vector
<
ForcePostComputation
*>
postComputations
;
CudaIntegrationUtilities
*
integration
;
CudaExpressionUtilities
*
expression
;
CudaBondedUtilities
*
bonded
;
...
...
@@ -580,7 +606,7 @@ private:
/**
* This abstract class defines a function to be executed whenever atoms get reordered.
* Objects that need to know when reordering happens should create a
r
eorderListener
* Objects that need to know when reordering happens should create a
R
eorderListener
* and register it by calling addReorderListener().
*/
class
CudaContext
::
ReorderListener
{
...
...
@@ -590,6 +616,39 @@ public:
}
};
/**
* This abstract class defines a function to be executed at the very beginning of force and
* energy evaluation, before any other calculation has been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePreComputation, register it by calling addForcePreComputation().
*/
class
CudaContext
::
ForcePreComputation
{
public:
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
virtual
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
=
0
;
};
/**
* This abstract class defines a function to be executed at the very end of force and
* energy evaluation, after all other calculations have been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePostComputation, register it by calling addForcePostComputation().
*/
class
CudaContext
::
ForcePostComputation
{
public:
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy.
*/
virtual
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
=
0
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDACONTEXT_H_*/
platforms/cuda/src/CudaKernels.cpp
View file @
b1d621b3
...
...
@@ -84,10 +84,12 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
void
CudaCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cu
.
setAsCurrent
();
cu
.
clearAutoclearBuffers
();
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cu
.
setComputeForceCount
(
cu
.
getComputeForceCount
()
+
1
);
cu
.
clearAutoclearBuffers
();
if
(
includeNonbonded
)
nb
.
prepareInteractions
();
}
...
...
@@ -96,8 +98,10 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
if
((
groups
&
(
1
<<
cu
.
getNonbondedUtilities
().
getForceGroup
()))
!=
0
)
cu
.
getNonbondedUtilities
().
computeInteractions
();
cu
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
double
sum
=
0.0
;
for
(
vector
<
CudaContext
::
ForcePostComputation
*>::
iterator
iter
=
cu
.
getPostComputations
().
begin
();
iter
!=
cu
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
cu
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
if
(
includeEnergy
)
{
CudaArray
&
energyArray
=
cu
.
getEnergyBuffer
();
if
(
cu
.
getUseDoublePrecision
())
{
...
...
@@ -1330,6 +1334,58 @@ private:
const
NonbondedForce
&
force
;
};
class
CudaCalcNonbondedForceKernel
::
PmeIO
:
public
CalcPmeReciprocalForceKernel
::
IO
{
public:
PmeIO
(
CudaContext
&
cu
,
CUfunction
addForcesKernel
)
:
cu
(
cu
),
addForcesKernel
(
addForcesKernel
),
forceTemp
(
NULL
)
{
forceTemp
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"PmeForce"
);
}
~
PmeIO
()
{
if
(
forceTemp
!=
NULL
)
delete
forceTemp
;
}
float
*
getPosq
()
{
cu
.
setAsCurrent
();
cu
.
getPosq
().
download
(
posq
);
return
(
float
*
)
&
posq
[
0
];
}
void
setForce
(
float
*
force
)
{
forceTemp
->
upload
(
force
);
void
*
args
[]
=
{
&
forceTemp
->
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
()};
cu
.
executeKernel
(
addForcesKernel
,
args
,
cu
.
getNumAtoms
());
}
private:
CudaContext
&
cu
;
vector
<
float4
>
posq
;
CudaArray
*
forceTemp
;
CUfunction
addForcesKernel
;
};
class
CudaCalcNonbondedForceKernel
::
PmePreComputation
:
public
CudaContext
::
ForcePreComputation
{
public:
PmePreComputation
(
CudaContext
&
cu
,
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
cu
(
cu
),
pme
(
pme
),
io
(
io
)
{
}
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
Vec3
boxSize
(
cu
.
getPeriodicBoxSize
().
x
,
cu
.
getPeriodicBoxSize
().
y
,
cu
.
getPeriodicBoxSize
().
z
);
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
beginComputation
(
io
,
boxSize
,
includeEnergy
);
}
private:
CudaContext
&
cu
;
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
class
CudaCalcNonbondedForceKernel
::
PmePostComputation
:
public
CudaContext
::
ForcePostComputation
{
public:
PmePostComputation
(
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
pme
(
pme
),
io
(
io
)
{
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
return
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
private:
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
CudaCalcNonbondedForceKernel
::~
CudaCalcNonbondedForceKernel
()
{
cu
.
setAsCurrent
();
if
(
sigmaEpsilon
!=
NULL
)
...
...
@@ -1354,6 +1410,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete
pmeAtomGridIndex
;
if
(
sort
!=
NULL
)
delete
sort
;
if
(
pmeio
!=
NULL
)
delete
pmeio
;
if
(
hasInitializedFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftBackward
);
...
...
@@ -1457,7 +1515,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
else
dispersionCoefficient
=
0.0
;
alpha
=
0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
&&
cu
.
getContextIndex
()
==
0
)
{
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
...
...
@@ -1465,7 +1523,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
// Create the reciprocal space kernels.
...
...
@@ -1484,7 +1542,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double2
)
:
sizeof
(
float2
));
cosSinSums
=
new
CudaArray
(
cu
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
&&
cu
.
getContextIndex
()
==
0
)
{
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
...
...
@@ -1497,7 +1555,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
pmeDefines
[
"PME_ORDER"
]
=
cu
.
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numParticles
);
pmeDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
...
...
@@ -1510,6 +1568,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if
(
cu
.
getUseDoublePrecision
())
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cu
.
getPlatformData
().
context
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
CUfunction
addForcesKernel
=
cu
.
getKernel
(
module
,
"addForces"
);
pmeio
=
new
PmeIO
(
cu
,
addForcesKernel
);
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuPme
,
*
pmeio
));
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
}
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
}
}
if
(
pmeio
==
NULL
)
{
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalConvolution"
);
...
...
@@ -1618,6 +1692,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
}
}
else
ewaldSelfEnergy
=
0.0
;
...
...
@@ -1654,13 +1729,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
double
CudaCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
cu
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
void
*
sumsArgs
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cosSinSums
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
ewaldSumsKernel
,
sumsArgs
,
cosSinSums
->
getSize
());
void
*
forcesArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cosSinSums
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
}
if
(
directPmeGrid
!=
NULL
&&
cu
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
...
...
platforms/cuda/src/CudaKernels.h
View file @
b1d621b3
...
...
@@ -557,7 +557,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
)
{
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
)
,
pmeio
(
NULL
)
{
}
~
CudaCalcNonbondedForceKernel
();
/**
...
...
@@ -596,6 +596,9 @@ private:
const
char
*
getMaxValue
()
const
{
return
"make_int2(INT_MAX, INT_MAX)"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
class
PmeIO
;
class
PmePreComputation
;
class
PmePostComputation
;
CudaContext
&
cu
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
...
...
@@ -609,6 +612,8 @@ private:
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
CUfunction
ewaldSumsKernel
;
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
b1d621b3
...
...
@@ -87,12 +87,14 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaDeviceName
());
platformProperties
.
push_back
(
CudaUseBlockingSync
());
platformProperties
.
push_back
(
CudaPrecision
());
platformProperties
.
push_back
(
CudaUseCpuPme
());
platformProperties
.
push_back
(
CudaCompiler
());
platformProperties
.
push_back
(
CudaTempDirectory
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
...
@@ -141,13 +143,20 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaUseBlockingSync
())
:
properties
.
find
(
CudaUseBlockingSync
())
->
second
);
string
precisionPropValue
=
(
properties
.
find
(
CudaPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaPrecision
())
:
properties
.
find
(
CudaPrecision
())
->
second
);
string
cpuPmePropValue
=
(
properties
.
find
(
CudaUseCpuPme
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaUseCpuPme
())
:
properties
.
find
(
CudaUseCpuPme
())
->
second
);
const
string
&
compilerPropValue
=
(
properties
.
find
(
CudaCompiler
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaCompiler
())
:
properties
.
find
(
CudaCompiler
())
->
second
);
const
string
&
tempPropValue
=
(
properties
.
find
(
CudaTempDirectory
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaTempDirectory
())
:
properties
.
find
(
CudaTempDirectory
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
compilerPropValue
,
tempPropValue
));
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -155,8 +164,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete
data
;
}
CudaPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
...
...
@@ -185,10 +194,12 @@ CudaPlatform::PlatformData::PlatformData(const System& system, const string& dev
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
deviceName
<<
name
;
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaPrecision
()]
=
precisionProperty
;
propertyValues
[
CudaPlatform
::
CudaUseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaCompiler
()]
=
compilerProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
...
...
platforms/cuda/src/kernels/pme.cu
View file @
b1d621b3
...
...
@@ -271,3 +271,13 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
));
}
}
extern
"C"
__global__
void
addForces
(
const
real4
*
__restrict__
forces
,
unsigned
long
long
*
__restrict__
forceBuffers
)
{
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
f
=
forces
[
atom
];
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
x
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
y
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0x100000000
));
}
}
platforms/cuda/tests/TestCudaRandom.cpp
View file @
b1d621b3
...
...
@@ -54,7 +54,7 @@ void testGaussian() {
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()));
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
...
...
platforms/cuda/tests/TestCudaSort.cpp
View file @
b1d621b3
...
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()));
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
...
...
platforms/opencl/include/OpenCLPlatform.h
View file @
b1d621b3
...
...
@@ -87,17 +87,25 @@ public:
static
const
std
::
string
key
=
"OpenCLPrecision"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
static
const
std
::
string
&
OpenCLUseCpuPme
()
{
static
const
std
::
string
key
=
"OpenCLUseCpuPme"
;
return
key
;
}
};
class
OPENMM_EXPORT_OPENCL
OpenCLPlatform
::
PlatformData
{
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
);
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
ContextImpl
*
context
;
std
::
vector
<
OpenCLContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
removeCM
;
bool
removeCM
,
useCpuPme
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
b1d621b3
...
...
@@ -334,6 +334,10 @@ OpenCLContext::~OpenCLContext() {
delete
forces
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
delete
reorderListeners
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
preComputations
.
size
();
i
++
)
delete
preComputations
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
postComputations
.
size
();
i
++
)
delete
postComputations
[
i
];
if
(
pinnedBuffer
!=
NULL
)
delete
pinnedBuffer
;
if
(
posq
!=
NULL
)
...
...
@@ -1106,6 +1110,14 @@ void OpenCLContext::addReorderListener(ReorderListener* listener) {
reorderListeners
.
push_back
(
listener
);
}
void
OpenCLContext
::
addPreComputation
(
ForcePreComputation
*
computation
)
{
preComputations
.
push_back
(
computation
);
}
void
OpenCLContext
::
addPostComputation
(
ForcePostComputation
*
computation
)
{
postComputations
.
push_back
(
computation
);
}
struct
OpenCLContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
OpenCLContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/opencl/src/OpenCLContext.h
View file @
b1d621b3
...
...
@@ -158,6 +158,8 @@ public:
class
WorkTask
;
class
WorkThread
;
class
ReorderListener
;
class
ForcePreComputation
;
class
ForcePostComputation
;
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
std
::
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
);
...
...
@@ -554,6 +556,28 @@ public:
std
::
vector
<
ReorderListener
*>&
getReorderListeners
()
{
return
reorderListeners
;
}
/**
* Add a pre-computation that should be called at the very start of force and energy evaluations.
* The OpenCLContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPreComputation
(
ForcePreComputation
*
computation
);
/**
* Get the list of ForcePreComputations.
*/
std
::
vector
<
ForcePreComputation
*>&
getPreComputations
()
{
return
preComputations
;
}
/**
* Add a post-computation that should be called at the very end of force and energy evaluations.
* The OpenCLContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPostComputation
(
ForcePostComputation
*
computation
);
/**
* Get the list of ForcePostComputations.
*/
std
::
vector
<
ForcePostComputation
*>&
getPostComputations
()
{
return
postComputations
;
}
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
...
...
@@ -625,6 +649,8 @@ private:
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
std
::
vector
<
ForcePreComputation
*>
preComputations
;
std
::
vector
<
ForcePostComputation
*>
postComputations
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLExpressionUtilities
*
expression
;
OpenCLBondedUtilities
*
bonded
;
...
...
@@ -686,7 +712,7 @@ private:
/**
* This abstract class defines a function to be executed whenever atoms get reordered.
* Objects that need to know when reordering happens should create a
r
eorderListener
* Objects that need to know when reordering happens should create a
R
eorderListener
* and register it by calling addReorderListener().
*/
class
OpenCLContext
::
ReorderListener
{
...
...
@@ -696,6 +722,39 @@ public:
}
};
/**
* This abstract class defines a function to be executed at the very beginning of force and
* energy evaluation, before any other calculation has been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePreComputation, register it by calling addForcePreComputation().
*/
class
OpenCLContext
::
ForcePreComputation
{
public:
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
virtual
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
=
0
;
};
/**
* This abstract class defines a function to be executed at the very end of force and
* energy evaluation, after all other calculations have been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePostComputation, register it by calling addForcePostComputation().
*/
class
OpenCLContext
::
ForcePostComputation
{
public:
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy.
*/
virtual
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
=
0
;
};
}
// namespace OpenMM
#endif
/*OPENMM_OPENCLCONTEXT_H_*/
platforms/opencl/src/OpenCLKernels.cpp
View file @
b1d621b3
...
...
@@ -104,10 +104,12 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void
OpenCLCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cl
.
clearAutoclearBuffers
();
for
(
vector
<
OpenCLContext
::
ForcePreComputation
*>::
iterator
iter
=
cl
.
getPreComputations
().
begin
();
iter
!=
cl
.
getPreComputations
().
end
();
++
iter
)
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
clearAutoclearBuffers
();
if
(
includeNonbonded
)
nb
.
prepareInteractions
();
}
...
...
@@ -117,8 +119,10 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
if
((
groups
&
(
1
<<
cl
.
getNonbondedUtilities
().
getForceGroup
()))
!=
0
)
cl
.
getNonbondedUtilities
().
computeInteractions
();
cl
.
reduceForces
();
double
sum
=
0.0
;
for
(
vector
<
OpenCLContext
::
ForcePostComputation
*>::
iterator
iter
=
cl
.
getPostComputations
().
begin
();
iter
!=
cl
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
cl
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
double
sum
=
0.0
f
;
if
(
includeEnergy
)
{
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
if
(
cl
.
getUseDoublePrecision
())
{
...
...
@@ -1323,6 +1327,58 @@ private:
const
NonbondedForce
&
force
;
};
class
OpenCLCalcNonbondedForceKernel
::
PmeIO
:
public
CalcPmeReciprocalForceKernel
::
IO
{
public:
PmeIO
(
OpenCLContext
&
cl
,
cl
::
Kernel
addForcesKernel
)
:
cl
(
cl
),
addForcesKernel
(
addForcesKernel
),
forceTemp
(
NULL
)
{
forceTemp
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
cl
.
getNumAtoms
(),
"PmeForce"
);
addForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
forceTemp
->
getDeviceBuffer
());
}
~
PmeIO
()
{
if
(
forceTemp
!=
NULL
)
delete
forceTemp
;
}
float
*
getPosq
()
{
cl
.
getPosq
().
download
(
posq
);
return
(
float
*
)
&
posq
[
0
];
}
void
setForce
(
float
*
force
)
{
forceTemp
->
upload
(
force
);
addForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForce
().
getDeviceBuffer
());
cl
.
executeKernel
(
addForcesKernel
,
cl
.
getNumAtoms
());
}
private:
OpenCLContext
&
cl
;
vector
<
mm_float4
>
posq
;
OpenCLArray
*
forceTemp
;
cl
::
Kernel
addForcesKernel
;
};
class
OpenCLCalcNonbondedForceKernel
::
PmePreComputation
:
public
OpenCLContext
::
ForcePreComputation
{
public:
PmePreComputation
(
OpenCLContext
&
cl
,
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
cl
(
cl
),
pme
(
pme
),
io
(
io
)
{
}
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
Vec3
boxSize
(
cl
.
getPeriodicBoxSize
().
x
,
cl
.
getPeriodicBoxSize
().
y
,
cl
.
getPeriodicBoxSize
().
z
);
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
beginComputation
(
io
,
boxSize
,
includeEnergy
);
}
private:
OpenCLContext
&
cl
;
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
class
OpenCLCalcNonbondedForceKernel
::
PmePostComputation
:
public
OpenCLContext
::
ForcePostComputation
{
public:
PmePostComputation
(
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
pme
(
pme
),
io
(
io
)
{
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
return
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
private:
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
OpenCLCalcNonbondedForceKernel
::~
OpenCLCalcNonbondedForceKernel
()
{
if
(
sigmaEpsilon
!=
NULL
)
delete
sigmaEpsilon
;
...
...
@@ -1350,6 +1406,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete
sort
;
if
(
fft
!=
NULL
)
delete
fft
;
if
(
pmeio
!=
NULL
)
delete
pmeio
;
}
void
OpenCLCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
...
@@ -1430,7 +1488,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
dispersionCoefficient
=
0.0
;
alpha
=
0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
&&
cl
.
getContextIndex
()
==
0
)
{
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
...
...
@@ -1438,7 +1496,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
// Create the reciprocal space kernels.
...
...
@@ -1454,7 +1512,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
&&
cl
.
getContextIndex
()
==
0
)
{
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
...
...
@@ -1465,7 +1523,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
pmeDefines
[
"PME_ORDER"
]
=
cl
.
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cl
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
...
...
@@ -1476,7 +1534,23 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
if
(
deviceIsCpu
)
pmeDefines
[
"DEVICE_IS_CPU"
]
=
"1"
;
if
(
cl
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cl
.
getPlatformData
().
context
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
pme
,
pmeDefines
);
cl
::
Kernel
addForcesKernel
=
cl
::
Kernel
(
program
,
"addForces"
);
pmeio
=
new
PmeIO
(
cl
,
addForcesKernel
);
cl
.
addPreComputation
(
new
PmePreComputation
(
cl
,
cpuPme
,
*
pmeio
));
cl
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
}
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
}
}
if
(
pmeio
==
NULL
)
{
// Create required data structures.
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
...
...
@@ -1565,6 +1639,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
}
}
}
else
ewaldSelfEnergy
=
0.0
;
...
...
@@ -1650,7 +1725,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
}
}
if
(
cosSinSums
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
recipBoxSize
=
mm_double4
(
2
*
M_PI
/
boxSize
.
x
,
2
*
M_PI
/
boxSize
.
y
,
2
*
M_PI
/
boxSize
.
z
,
0.0
);
double
recipCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
...
...
@@ -1669,7 +1744,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
ewaldSumsKernel
,
cosSinSums
->
getSize
());
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
if
(
pmeGrid
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
b1d621b3
...
...
@@ -557,7 +557,7 @@ public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
)
{
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
)
,
pmeio
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -596,6 +596,9 @@ private:
const
char
*
getMaxValue
()
const
{
return
"(int2) (INT_MAX, INT_MAX)"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
class
PmeIO
;
class
PmePreComputation
;
class
PmePostComputation
;
OpenCLContext
&
cl
;
bool
hasInitializedKernel
;
OpenCLArray
*
sigmaEpsilon
;
...
...
@@ -611,6 +614,8 @@ private:
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLSort
*
sort
;
OpenCLFFT3D
*
fft
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
b1d621b3
...
...
@@ -31,6 +31,7 @@
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include <algorithm>
#include <sstream>
using
namespace
OpenMM
;
...
...
@@ -78,11 +79,13 @@ OpenCLPlatform::OpenCLPlatform() {
platformProperties
.
push_back
(
OpenCLPlatformIndex
());
platformProperties
.
push_back
(
OpenCLPlatformName
());
platformProperties
.
push_back
(
OpenCLPrecision
());
platformProperties
.
push_back
(
OpenCLUseCpuPme
());
setPropertyDefaultValue
(
OpenCLDeviceIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLDeviceName
(),
""
);
setPropertyDefaultValue
(
OpenCLPlatformIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLPlatformName
(),
""
);
setPropertyDefaultValue
(
OpenCLPrecision
(),
"single"
);
setPropertyDefaultValue
(
OpenCLUseCpuPme
(),
"false"
);
}
double
OpenCLPlatform
::
getSpeed
()
const
{
...
...
@@ -112,7 +115,15 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
getPropertyDefaultValue
(
OpenCLDeviceIndex
())
:
properties
.
find
(
OpenCLDeviceIndex
())
->
second
);
string
precisionPropValue
=
(
properties
.
find
(
OpenCLPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
OpenCLPrecision
())
:
properties
.
find
(
OpenCLPrecision
())
->
second
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
));
string
cpuPmePropValue
=
(
properties
.
find
(
OpenCLUseCpuPme
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
OpenCLUseCpuPme
())
:
properties
.
find
(
OpenCLUseCpuPme
())
->
second
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
));
}
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -121,7 +132,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
}
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
const
string
&
precisionProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
int
platformIndex
=
0
;
if
(
platformPropValue
.
length
()
>
0
)
stringstream
(
platformPropValue
)
>>
platformIndex
;
...
...
@@ -150,6 +161,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
deviceIndex
<<
contexts
[
i
]
->
getDeviceIndex
();
deviceName
<<
contexts
[
i
]
->
getDevice
().
getInfo
<
CL_DEVICE_NAME
>
();
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
contexts
[
0
]
->
intToString
(
platformIndex
);
...
...
@@ -157,6 +169,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
cl
::
Platform
::
get
(
&
platforms
);
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformName
()]
=
platforms
[
platformIndex
].
getInfo
<
CL_PLATFORM_NAME
>
();
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
propertyValues
[
OpenCLPlatform
::
OpenCLUseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
contextEnergy
.
resize
(
contexts
.
size
());
}
...
...
platforms/opencl/src/kernels/pme.cl
View file @
b1d621b3
...
...
@@ -391,3 +391,8 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
forceBuffers[atom]
=
totalForce
;
}
}
__kernel
void
addForces
(
__global
const
real4*
restrict
forces,
__global
real4*
restrict
forceBuffers
)
{
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0))
forceBuffers[atom]
+=
forces[atom]
;
}
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
b1d621b3
...
...
@@ -54,7 +54,7 @@ template <class Real2>
void
testTransform
()
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
));
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
)
,
"false"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment