Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f2548616
Commit
f2548616
authored
Jun 27, 2013
by
peastman
Browse files
OpenCL platform can use the CPU based PME
parent
3628d973
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
290 additions
and
127 deletions
+290
-127
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+2
-2
platforms/cuda/src/CudaContext.h
platforms/cuda/src/CudaContext.h
+2
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+4
-3
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+6
-6
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+10
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+12
-0
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+59
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+165
-90
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+6
-1
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+15
-2
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+5
-0
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+1
-1
platforms/opencl/tests/TestOpenCLRandom.cpp
platforms/opencl/tests/TestOpenCLRandom.cpp
+1
-1
platforms/opencl/tests/TestOpenCLSort.cpp
platforms/opencl/tests/TestOpenCLSort.cpp
+1
-1
plugins/cpupme/src/CpuPmeKernelFactory.cpp
plugins/cpupme/src/CpuPmeKernelFactory.cpp
+1
-1
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+0
-14
No files found.
platforms/cuda/include/CudaPlatform.h
View file @
f2548616
...
...
@@ -84,8 +84,8 @@ public:
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
static
const
std
::
string
&
UseCpuPme
()
{
static
const
std
::
string
key
=
"UseCpuPme"
;
static
const
std
::
string
&
Cuda
UseCpuPme
()
{
static
const
std
::
string
key
=
"
Cuda
UseCpuPme"
;
return
key
;
}
/**
...
...
platforms/cuda/src/CudaContext.h
View file @
f2548616
...
...
@@ -457,7 +457,7 @@ public:
return
reorderListeners
;
}
/**
* Add a pre-computation that should be called at the very start of force and energy evalu
t
ations.
* Add a pre-computation that should be called at the very start of force and energy evaluations.
* The CudaContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPreComputation
(
ForcePreComputation
*
computation
);
...
...
@@ -468,7 +468,7 @@ public:
return
preComputations
;
}
/**
* Add a post-computation that should be called at the very end of force and energy evalu
t
ations.
* Add a post-computation that should be called at the very end of force and energy evaluations.
* The CudaContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPostComputation
(
ForcePostComputation
*
computation
);
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
f2548616
...
...
@@ -1337,8 +1337,7 @@ private:
class
CudaCalcNonbondedForceKernel
::
PmeIO
:
public
CalcPmeReciprocalForceKernel
::
IO
{
public:
PmeIO
(
CudaContext
&
cu
,
CUfunction
addForcesKernel
)
:
cu
(
cu
),
addForcesKernel
(
addForcesKernel
),
forceTemp
(
NULL
)
{
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
forceTemp = new CudaArray(cu, cu.getNumAtoms(), elementSize, "PmeForce");
forceTemp
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"PmeForce"
);
}
~
PmeIO
()
{
if
(
forceTemp
!=
NULL
)
...
...
@@ -1570,6 +1569,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cu
.
getPlatformData
().
context
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
...
...
@@ -1728,7 +1729,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
double
CudaCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
void
*
sumsArgs
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cosSinSums
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
ewaldSumsKernel
,
sumsArgs
,
cosSinSums
->
getSize
());
...
...
@@ -1774,6 +1774,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
energy
+=
dispersionCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
f2548616
...
...
@@ -86,15 +86,15 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaDeviceIndex
());
platformProperties
.
push_back
(
CudaDeviceName
());
platformProperties
.
push_back
(
CudaUseBlockingSync
());
platformProperties
.
push_back
(
UseCpuPme
());
platformProperties
.
push_back
(
CudaPrecision
());
platformProperties
.
push_back
(
CudaUseCpuPme
());
platformProperties
.
push_back
(
CudaCompiler
());
platformProperties
.
push_back
(
CudaTempDirectory
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
UseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
Cuda
UseCpuPme
(),
"false"
);
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
...
@@ -143,8 +143,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaUseBlockingSync
())
:
properties
.
find
(
CudaUseBlockingSync
())
->
second
);
string
precisionPropValue
=
(
properties
.
find
(
CudaPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaPrecision
())
:
properties
.
find
(
CudaPrecision
())
->
second
);
string
cpuPmePropValue
=
(
properties
.
find
(
UseCpuPme
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
UseCpuPme
())
:
properties
.
find
(
UseCpuPme
())
->
second
);
string
cpuPmePropValue
=
(
properties
.
find
(
Cuda
UseCpuPme
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
Cuda
UseCpuPme
())
:
properties
.
find
(
Cuda
UseCpuPme
())
->
second
);
const
string
&
compilerPropValue
=
(
properties
.
find
(
CudaCompiler
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaCompiler
())
:
properties
.
find
(
CudaCompiler
())
->
second
);
const
string
&
tempPropValue
=
(
properties
.
find
(
CudaTempDirectory
())
==
properties
.
end
()
?
...
...
@@ -167,7 +167,6 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
useCpuPme
=
(
cpuPmeProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
while
((
nextPos
=
deviceIndexProperty
.
find_first_of
(
", "
,
searchPos
))
!=
string
::
npos
)
{
...
...
@@ -195,11 +194,12 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
deviceName
<<
name
;
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaPrecision
()]
=
precisionProperty
;
propertyValues
[
CudaPlatform
::
UseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
Cuda
UseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaCompiler
()]
=
compilerProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
...
...
platforms/opencl/include/OpenCLPlatform.h
View file @
f2548616
...
...
@@ -87,17 +87,25 @@ public:
static
const
std
::
string
key
=
"OpenCLPrecision"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
static
const
std
::
string
&
OpenCLUseCpuPme
()
{
static
const
std
::
string
key
=
"OpenCLUseCpuPme"
;
return
key
;
}
};
class
OPENMM_EXPORT_OPENCL
OpenCLPlatform
::
PlatformData
{
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
);
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
ContextImpl
*
context
;
std
::
vector
<
OpenCLContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
removeCM
;
bool
removeCM
,
useCpuPme
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
f2548616
...
...
@@ -334,6 +334,10 @@ OpenCLContext::~OpenCLContext() {
delete
forces
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
delete
reorderListeners
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
preComputations
.
size
();
i
++
)
delete
preComputations
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
postComputations
.
size
();
i
++
)
delete
postComputations
[
i
];
if
(
pinnedBuffer
!=
NULL
)
delete
pinnedBuffer
;
if
(
posq
!=
NULL
)
...
...
@@ -1106,6 +1110,14 @@ void OpenCLContext::addReorderListener(ReorderListener* listener) {
reorderListeners
.
push_back
(
listener
);
}
void
OpenCLContext
::
addPreComputation
(
ForcePreComputation
*
computation
)
{
preComputations
.
push_back
(
computation
);
}
void
OpenCLContext
::
addPostComputation
(
ForcePostComputation
*
computation
)
{
postComputations
.
push_back
(
computation
);
}
struct
OpenCLContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
OpenCLContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/opencl/src/OpenCLContext.h
View file @
f2548616
...
...
@@ -158,6 +158,8 @@ public:
class
WorkTask
;
class
WorkThread
;
class
ReorderListener
;
class
ForcePreComputation
;
class
ForcePostComputation
;
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
std
::
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
);
...
...
@@ -554,6 +556,28 @@ public:
std
::
vector
<
ReorderListener
*>&
getReorderListeners
()
{
return
reorderListeners
;
}
/**
* Add a pre-computation that should be called at the very start of force and energy evaluations.
* The OpenCLContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPreComputation
(
ForcePreComputation
*
computation
);
/**
* Get the list of ForcePreComputations.
*/
std
::
vector
<
ForcePreComputation
*>&
getPreComputations
()
{
return
preComputations
;
}
/**
* Add a post-computation that should be called at the very end of force and energy evaluations.
* The OpenCLContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void
addPostComputation
(
ForcePostComputation
*
computation
);
/**
* Get the list of ForcePostComputations.
*/
std
::
vector
<
ForcePostComputation
*>&
getPostComputations
()
{
return
postComputations
;
}
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
...
...
@@ -625,6 +649,8 @@ private:
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
std
::
vector
<
ForcePreComputation
*>
preComputations
;
std
::
vector
<
ForcePostComputation
*>
postComputations
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLExpressionUtilities
*
expression
;
OpenCLBondedUtilities
*
bonded
;
...
...
@@ -686,7 +712,7 @@ private:
/**
* This abstract class defines a function to be executed whenever atoms get reordered.
* Objects that need to know when reordering happens should create a
r
eorderListener
* Objects that need to know when reordering happens should create a
R
eorderListener
* and register it by calling addReorderListener().
*/
class
OpenCLContext
::
ReorderListener
{
...
...
@@ -696,6 +722,38 @@ public:
}
};
/**
* This abstract class defines a function to be executed at the very beginning of force and
* energy evaluation, before any other calculation has been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePreComputation, register it by calling addForcePreComputation().
*/
class
OpenCLContext
::
ForcePreComputation
{
public:
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
virtual
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
=
0
;
};
/**
* This abstract class defines a function to be executed at the very end of force and
* energy evaluation, after all other calculations have been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePostComputation, register it by calling addForcePostComputation().
*/
class
OpenCLContext
::
ForcePostComputation
{
public:
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy. */
virtual
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
=
0
;
};
}
// namespace OpenMM
#endif
/*OPENMM_OPENCLCONTEXT_H_*/
platforms/opencl/src/OpenCLKernels.cpp
View file @
f2548616
...
...
@@ -104,10 +104,12 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void
OpenCLCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cl
.
clearAutoclearBuffers
();
for
(
vector
<
OpenCLContext
::
ForcePreComputation
*>::
iterator
iter
=
cl
.
getPreComputations
().
begin
();
iter
!=
cl
.
getPreComputations
().
end
();
++
iter
)
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
clearAutoclearBuffers
();
if
(
includeNonbonded
)
nb
.
prepareInteractions
();
}
...
...
@@ -117,8 +119,10 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
if
((
groups
&
(
1
<<
cl
.
getNonbondedUtilities
().
getForceGroup
()))
!=
0
)
cl
.
getNonbondedUtilities
().
computeInteractions
();
cl
.
reduceForces
();
double
sum
=
0.0
;
for
(
vector
<
OpenCLContext
::
ForcePostComputation
*>::
iterator
iter
=
cl
.
getPostComputations
().
begin
();
iter
!=
cl
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
cl
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
double
sum
=
0.0
f
;
if
(
includeEnergy
)
{
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
if
(
cl
.
getUseDoublePrecision
())
{
...
...
@@ -1323,6 +1327,58 @@ private:
const
NonbondedForce
&
force
;
};
class
OpenCLCalcNonbondedForceKernel
::
PmeIO
:
public
CalcPmeReciprocalForceKernel
::
IO
{
public:
PmeIO
(
OpenCLContext
&
cl
,
cl
::
Kernel
addForcesKernel
)
:
cl
(
cl
),
addForcesKernel
(
addForcesKernel
),
forceTemp
(
NULL
)
{
forceTemp
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
cl
.
getNumAtoms
(),
"PmeForce"
);
addForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
forceTemp
->
getDeviceBuffer
());
}
~
PmeIO
()
{
if
(
forceTemp
!=
NULL
)
delete
forceTemp
;
}
float
*
getPosq
()
{
cl
.
getPosq
().
download
(
posq
);
return
(
float
*
)
&
posq
[
0
];
}
void
setForce
(
float
*
force
)
{
forceTemp
->
upload
(
force
);
addForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForce
().
getDeviceBuffer
());
cl
.
executeKernel
(
addForcesKernel
,
cl
.
getNumAtoms
());
}
private:
OpenCLContext
&
cl
;
vector
<
mm_float4
>
posq
;
OpenCLArray
*
forceTemp
;
cl
::
Kernel
addForcesKernel
;
};
class
OpenCLCalcNonbondedForceKernel
::
PmePreComputation
:
public
OpenCLContext
::
ForcePreComputation
{
public:
PmePreComputation
(
OpenCLContext
&
cl
,
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
cl
(
cl
),
pme
(
pme
),
io
(
io
)
{
}
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
Vec3
boxSize
(
cl
.
getPeriodicBoxSize
().
x
,
cl
.
getPeriodicBoxSize
().
y
,
cl
.
getPeriodicBoxSize
().
z
);
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
beginComputation
(
io
,
boxSize
,
includeEnergy
);
}
private:
OpenCLContext
&
cl
;
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
class
OpenCLCalcNonbondedForceKernel
::
PmePostComputation
:
public
OpenCLContext
::
ForcePostComputation
{
public:
PmePostComputation
(
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
pme
(
pme
),
io
(
io
)
{
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
return
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
private:
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
OpenCLCalcNonbondedForceKernel
::~
OpenCLCalcNonbondedForceKernel
()
{
if
(
sigmaEpsilon
!=
NULL
)
delete
sigmaEpsilon
;
...
...
@@ -1350,6 +1406,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete
sort
;
if
(
fft
!=
NULL
)
delete
fft
;
if
(
pmeio
!=
NULL
)
delete
pmeio
;
}
void
OpenCLCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
...
@@ -1430,7 +1488,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
dispersionCoefficient
=
0.0
;
alpha
=
0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
&&
cl
.
getContextIndex
()
==
0
)
{
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
...
...
@@ -1438,7 +1496,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
// Create the reciprocal space kernels.
...
...
@@ -1454,7 +1512,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
&&
cl
.
getContextIndex
()
==
0
)
{
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
...
...
@@ -1465,7 +1523,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
pmeDefines
[
"PME_ORDER"
]
=
cl
.
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cl
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
...
...
@@ -1476,92 +1534,109 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
if
(
deviceIsCpu
)
pmeDefines
[
"DEVICE_IS_CPU"
]
=
"1"
;
// Create required data structures.
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cl
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeGrid2
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
(
cl
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
(
cl
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
(
cl
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
if
(
cl
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cl
.
getPlatformData
().
context
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
pme
,
pmeDefines
);
cl
::
Kernel
addForcesKernel
=
cl
::
Kernel
(
program
,
"addForces"
);
pmeio
=
new
PmeIO
(
cl
,
addForcesKernel
);
cl
.
addPreComputation
(
new
PmePreComputation
(
cl
,
cpuPme
,
*
pmeio
));
cl
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
}
if
(
cl
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
if
(
pmeio
==
NULL
)
{
// Create required data structures.
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cl
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeGrid2
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
(
cl
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
(
cl
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
(
cl
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
else
{
vector
<
float
>
modulif
(
ndata
);
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
{
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
}
if
(
cl
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
}
}
}
}
...
...
@@ -1650,7 +1725,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
}
}
if
(
cosSinSums
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
recipBoxSize
=
mm_double4
(
2
*
M_PI
/
boxSize
.
x
,
2
*
M_PI
/
boxSize
.
y
,
2
*
M_PI
/
boxSize
.
z
,
0.0
);
double
recipCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
...
...
@@ -1669,7 +1744,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
ewaldSumsKernel
,
cosSinSums
->
getSize
());
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
if
(
pmeGrid
!=
NULL
&&
cl
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
f2548616
...
...
@@ -557,7 +557,7 @@ public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
)
{
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
)
,
pmeio
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -596,6 +596,9 @@ private:
const
char
*
getMaxValue
()
const
{
return
"(int2) (INT_MAX, INT_MAX)"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
class
PmeIO
;
class
PmePreComputation
;
class
PmePostComputation
;
OpenCLContext
&
cl
;
bool
hasInitializedKernel
;
OpenCLArray
*
sigmaEpsilon
;
...
...
@@ -611,6 +614,8 @@ private:
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLSort
*
sort
;
OpenCLFFT3D
*
fft
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
f2548616
...
...
@@ -31,6 +31,7 @@
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include <algorithm>
#include <sstream>
using
namespace
OpenMM
;
...
...
@@ -78,11 +79,13 @@ OpenCLPlatform::OpenCLPlatform() {
platformProperties
.
push_back
(
OpenCLPlatformIndex
());
platformProperties
.
push_back
(
OpenCLPlatformName
());
platformProperties
.
push_back
(
OpenCLPrecision
());
platformProperties
.
push_back
(
OpenCLUseCpuPme
());
setPropertyDefaultValue
(
OpenCLDeviceIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLDeviceName
(),
""
);
setPropertyDefaultValue
(
OpenCLPlatformIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLPlatformName
(),
""
);
setPropertyDefaultValue
(
OpenCLPrecision
(),
"single"
);
setPropertyDefaultValue
(
OpenCLUseCpuPme
(),
"false"
);
}
double
OpenCLPlatform
::
getSpeed
()
const
{
...
...
@@ -112,7 +115,15 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
getPropertyDefaultValue
(
OpenCLDeviceIndex
())
:
properties
.
find
(
OpenCLDeviceIndex
())
->
second
);
string
precisionPropValue
=
(
properties
.
find
(
OpenCLPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
OpenCLPrecision
())
:
properties
.
find
(
OpenCLPrecision
())
->
second
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
));
string
cpuPmePropValue
=
(
properties
.
find
(
OpenCLUseCpuPme
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
OpenCLUseCpuPme
())
:
properties
.
find
(
OpenCLUseCpuPme
())
->
second
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
));
}
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -121,7 +132,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
}
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
const
string
&
precisionProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
int
platformIndex
=
0
;
if
(
platformPropValue
.
length
()
>
0
)
stringstream
(
platformPropValue
)
>>
platformIndex
;
...
...
@@ -150,6 +161,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
deviceIndex
<<
contexts
[
i
]
->
getDeviceIndex
();
deviceName
<<
contexts
[
i
]
->
getDevice
().
getInfo
<
CL_DEVICE_NAME
>
();
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
contexts
[
0
]
->
intToString
(
platformIndex
);
...
...
@@ -157,6 +169,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
cl
::
Platform
::
get
(
&
platforms
);
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformName
()]
=
platforms
[
platformIndex
].
getInfo
<
CL_PLATFORM_NAME
>
();
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
propertyValues
[
OpenCLPlatform
::
OpenCLUseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
contextEnergy
.
resize
(
contexts
.
size
());
}
...
...
platforms/opencl/src/kernels/pme.cl
View file @
f2548616
...
...
@@ -391,3 +391,8 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
forceBuffers[atom]
=
totalForce
;
}
}
__kernel
void
addForces
(
__global
const
real4*
restrict
forces,
__global
real4*
restrict
forceBuffers
)
{
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0))
forceBuffers[atom]
+=
forces[atom]
;
}
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
f2548616
...
...
@@ -54,7 +54,7 @@ template <class Real2>
void
testTransform
()
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
));
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
)
,
"false"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/opencl/tests/TestOpenCLRandom.cpp
View file @
f2548616
...
...
@@ -54,7 +54,7 @@ void testGaussian() {
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
));
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
)
,
"false"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/opencl/tests/TestOpenCLSort.cpp
View file @
f2548616
...
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
));
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
)
,
"false"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
...
...
plugins/cpupme/src/CpuPmeKernelFactory.cpp
View file @
f2548616
/* -------------------------------------------------------------------------- *
* OpenMM
Amoeba
*
* OpenMM
CpuPme
*
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
...
...
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
f2548616
...
...
@@ -514,11 +514,6 @@ CpuCalcPmeReciprocalForceKernel::~CpuCalcPmeReciprocalForceKernel() {
}
}
#include <sys/time.h>
double
diff
(
struct
timeval
t1
,
struct
timeval
t2
)
{
return
t2
.
tv_usec
-
t1
.
tv_usec
+
1e6
*
(
t2
.
tv_sec
-
t1
.
tv_sec
);
}
void
CpuCalcPmeReciprocalForceKernel
::
runThread
(
int
index
)
{
if
(
index
==
-
1
)
{
// This is the main thread that coordinates all the other ones.
...
...
@@ -531,26 +526,17 @@ void CpuCalcPmeReciprocalForceKernel::runThread(int index) {
if
(
isDeleted
)
break
;
posq
=
io
->
getPosq
();
struct
timeval
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
;
gettimeofday
(
&
t1
,
NULL
);
advanceThreads
();
// Signal threads to perform charge spreading.
advanceThreads
();
// Signal threads to sum the charge grids.
gettimeofday
(
&
t2
,
NULL
);
fftwf_execute_dft_r2c
(
forwardFFT
,
realGrid
,
complexGrid
);
gettimeofday
(
&
t3
,
NULL
);
if
(
lastBoxSize
!=
periodicBoxSize
)
advanceThreads
();
// Signal threads to compute the reciprocal scale factors.
if
(
includeEnergy
)
advanceThreads
();
// Signal threads to compute energy.
gettimeofday
(
&
t4
,
NULL
);
advanceThreads
();
// Signal threads to perform reciprocal convolution.
gettimeofday
(
&
t5
,
NULL
);
fftwf_execute_dft_c2r
(
backwardFFT
,
complexGrid
,
realGrid
);
gettimeofday
(
&
t6
,
NULL
);
advanceThreads
();
// Signal threads to interpolate forces.
isFinished
=
true
;
gettimeofday
(
&
t7
,
NULL
);
printf
(
"time %g %g %g %g %g %g
\n
"
,
diff
(
t1
,
t2
),
diff
(
t2
,
t3
),
diff
(
t3
,
t4
),
diff
(
t4
,
t5
),
diff
(
t5
,
t6
),
diff
(
t6
,
t7
));
lastBoxSize
=
periodicBoxSize
;
pthread_cond_signal
(
&
mainThreadEndCondition
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment