Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7bfb75c7
Commit
7bfb75c7
authored
Jul 20, 2013
by
Lee-Ping Wang
Browse files
Merge branch 'master' of github.com:SimTk/openmm
parents
a85d163b
49cb91c5
Changes
27
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
72 additions
and
31 deletions
+72
-31
cmake_modules/FindFFTW.cmake
cmake_modules/FindFFTW.cmake
+0
-1
examples/testInstallation.py
examples/testInstallation.py
+1
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+4
-0
platforms/cuda/src/kernels/langevin.cu
platforms/cuda/src/kernels/langevin.cu
+11
-0
platforms/cuda/src/kernels/verlet.cu
platforms/cuda/src/kernels/verlet.cu
+9
-0
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+3
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+4
-0
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-1
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
+7
-8
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+4
-1
platforms/opencl/src/kernels/langevin.cl
platforms/opencl/src/kernels/langevin.cl
+2
-1
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+2
-0
platforms/opencl/src/kernels/verlet.cl
platforms/opencl/src/kernels/verlet.cl
+2
-1
platforms/opencl/tests/TestOpenCLCustomNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLCustomNonbondedForce.cpp
+1
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
...ins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
+1
-1
plugins/cpupme/CMakeLists.txt
plugins/cpupme/CMakeLists.txt
+4
-1
plugins/cpupme/src/CpuPmeKernelFactory.cpp
plugins/cpupme/src/CpuPmeKernelFactory.cpp
+2
-2
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+8
-6
plugins/cpupme/tests/TestCpuPme.cpp
plugins/cpupme/tests/TestCpuPme.cpp
+4
-0
plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
+2
-2
No files found.
cmake_modules/FindFFTW.cmake
View file @
7bfb75c7
...
...
@@ -20,6 +20,5 @@ find_library (FFTW_THREADS_LIBRARY NAMES fftw3f_threads)
# all listed variables are TRUE
include
(
FindPackageHandleStandardArgs
)
find_package_handle_standard_args
(
FFTW DEFAULT_MSG FFTW_LIBRARY FFTW_INCLUDES
)
find_package_handle_standard_args
(
FFTW_THREADS DEFAULT_MSG FFTW_THREADS_LIBRARY FFTW_INCLUDES
)
mark_as_advanced
(
FFTW_LIBRARY FFTW_THREADS_LIBRARY FFTW_INCLUDES
)
examples/testInstallation.py
View file @
7bfb75c7
...
...
@@ -30,6 +30,7 @@ for i in range(numPlatforms):
simulation
=
Simulation
(
pdb
.
topology
,
system
,
integrator
,
platform
)
simulation
.
context
.
setPositions
(
pdb
.
positions
)
forces
[
i
]
=
simulation
.
context
.
getState
(
getForces
=
True
).
getForces
()
del
simulation
print
"- Successfully computed forces"
except
:
print
"- Error computing forces with"
,
platform
.
getName
(),
"platform"
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
7bfb75c7
...
...
@@ -5222,6 +5222,10 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
setTime
(
cu
.
getTime
()
+
integrator
.
getStepSize
());
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
if
(
cu
.
getAtomsWereReordered
())
{
forcesAreValid
=
false
;
validSavedForces
.
clear
();
}
}
double
CudaIntegrateCustomStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
...
...
platforms/cuda/src/kernels/langevin.cu
View file @
7bfb75c7
...
...
@@ -32,7 +32,12 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
*/
extern
"C"
__global__
void
integrateLangevinPart2
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
mixed2
*
__restrict__
dt
)
{
#if __CUDA_ARCH__ >= 130
double
invStepSize
=
1.0
/
dt
[
0
].
y
;
#else
float
invStepSize
=
1.0
f
/
dt
[
0
].
y
;
float
correction
=
(
1.0
f
-
invStepSize
*
dt
[
0
].
y
)
/
dt
[
0
].
y
;
#endif
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
index
<
NUM_ATOMS
)
{
mixed4
vel
=
velm
[
index
];
...
...
@@ -48,9 +53,15 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
#if __CUDA_ARCH__ >= 130
vel
.
x
=
(
mixed
)
(
invStepSize
*
delta
.
x
);
vel
.
y
=
(
mixed
)
(
invStepSize
*
delta
.
y
);
vel
.
z
=
(
mixed
)
(
invStepSize
*
delta
.
z
);
#else
vel
.
x
=
invStepSize
*
delta
.
x
+
correction
*
delta
.
x
;
vel
.
y
=
invStepSize
*
delta
.
y
+
correction
*
delta
.
x
;
vel
.
z
=
invStepSize
*
delta
.
z
+
correction
*
delta
.
x
;
#endif
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
...
...
platforms/cuda/src/kernels/verlet.cu
View file @
7bfb75c7
...
...
@@ -37,7 +37,12 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
extern
"C"
__global__
void
integrateVerletPart2
(
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
mixed2
stepSize
=
dt
[
0
];
#if __CUDA_ARCH__ >= 130
double
oneOverDt
=
1.0
/
stepSize
.
y
;
#else
float
oneOverDt
=
1.0
f
/
stepSize
.
y
;
float
correction
=
(
1.0
f
-
oneOverDt
*
stepSize
.
y
)
/
stepSize
.
y
;
#endif
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
...
...
@@ -55,7 +60,11 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
#if __CUDA_ARCH__ >= 130
velocity
=
make_mixed4
((
mixed
)
(
delta
.
x
*
oneOverDt
),
(
mixed
)
(
delta
.
y
*
oneOverDt
),
(
mixed
)
(
delta
.
z
*
oneOverDt
),
velocity
.
w
);
#else
velocity
=
make_mixed4
((
mixed
)
(
delta
.
x
*
oneOverDt
+
delta
.
x
*
correction
),
(
mixed
)
(
delta
.
y
*
oneOverDt
+
delta
.
y
*
correction
),
(
mixed
)
(
delta
.
z
*
oneOverDt
+
delta
.
z
*
correction
),
velocity
.
w
);
#endif
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
7bfb75c7
...
...
@@ -97,7 +97,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Try to figure out which device is the fastest.
int
bestSpeed
=
-
1
;
bool
bestSupportsDouble
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
if
(
platformVendor
==
"Apple"
&&
devices
[
i
].
getInfo
<
CL_DEVICE_VENDOR
>
()
==
"AMD"
)
continue
;
// Don't use AMD GPUs on OS X due to serious bugs.
...
...
@@ -136,11 +135,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
}
}
int
speed
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
()
*
processingElementsPerComputeUnit
*
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_CLOCK_FREQUENCY
>
();
bool
supportsDouble
=
(
devices
[
i
].
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_fp64"
)
!=
string
::
npos
);
if
(
maxSize
>=
minThreadBlockSize
&&
speed
>
bestSpeed
&&
(
supportsDouble
||
!
bestSupportsDouble
))
{
if
(
maxSize
>=
minThreadBlockSize
&&
speed
>
bestSpeed
)
{
deviceIndex
=
i
;
bestSpeed
=
speed
;
bestSupportsDouble
=
supportsDouble
;
}
}
}
...
...
@@ -277,6 +274,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
clearFiveBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearSixBuffers"
);
reduceReal4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceReal4Buffer"
);
if
(
supports64BitGlobalAtomics
)
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
7bfb75c7
...
...
@@ -5444,6 +5444,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl
.
setTime
(
cl
.
getTime
()
+
integrator
.
getStepSize
());
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
if
(
cl
.
getAtomsWereReordered
())
{
forcesAreValid
=
false
;
validSavedForces
.
clear
();
}
// Reduce UI lag.
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
7bfb75c7
...
...
@@ -81,7 +81,7 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
}
else
{
numForceThreadBlocks
=
context
.
getNumThreadBlocks
();
forceThreadBlockSize
=
OpenCLContext
::
ThreadBlockSize
;
forceThreadBlockSize
=
(
context
.
getSIMDWidth
()
>=
32
?
OpenCLContext
::
ThreadBlockSize
:
32
)
;
if
(
context
.
getSupports64BitGlobalAtomics
())
{
// Even though using longForceBuffer, still need a single forceBuffer for the reduceForces kernel to convert the long results into float4 which will be used by later kernels.
numForceBuffers
=
1
;
...
...
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
View file @
7bfb75c7
...
...
@@ -70,7 +70,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
*
to
global
memory.
*/
void
storeInteractionData
(
unsigned
short
x,
unsigned
short*
buffer,
int*
atoms,
int*
numAtoms,
int
numValid,
__global
unsigned
int*
interactionCount,
__global
ushort2
*
interactingTiles,
__global
unsigned
int*
interactingAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
int
*
interactingTiles,
__global
unsigned
int*
interactingAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
real4*
posq,
real4
blockCenterX,
real4
blockSizeX,
unsigned
int
maxTiles,
bool
finish
)
{
real4
posBuffer[TILE_SIZE]
;
const
bool
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
PADDED_CUTOFF
&&
...
...
@@ -128,7 +128,7 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms,
int
baseIndex
=
atom_add
(
interactionCount,
tilesToStore
)
;
if
(
baseIndex+tilesToStore
<=
maxTiles
)
{
for
(
int
i
=
0
; i < tilesToStore; i++) {
interactingTiles[baseIndex+i]
=
(
ushort2
)
(
x,
singlePeriodicCopy
)
;
interactingTiles[baseIndex+i]
=
x
;
for
(
int
j
=
0
; j < TILE_SIZE; j++)
interactingAtoms[
(
baseIndex+i
)
*TILE_SIZE+j]
=
atoms[i*TILE_SIZE+j]
;
}
...
...
@@ -145,7 +145,7 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms,
int
baseIndex
=
atom_add
(
interactionCount,
tilesToStore
)
;
if
(
baseIndex+tilesToStore
<=
maxTiles
)
{
for
(
int
i
=
0
; i < tilesToStore; i++) {
interactingTiles[baseIndex+i]
=
(
ushort2
)
(
x,
singlePeriodicCopy
)
;
interactingTiles[baseIndex+i]
=
x
;
for
(
int
j
=
0
; j < TILE_SIZE; j++) {
int
index
=
i*TILE_SIZE+j
;
interactingAtoms[
(
baseIndex+i
)
*TILE_SIZE+j]
=
(
index
<
*numAtoms
?
atoms[index]
:
NUM_ATOMS
)
;
...
...
@@ -159,9 +159,8 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms,
*
Compare
the
bounding
boxes
for
each
pair
of
blocks.
If
they
are
sufficiently
far
apart,
*
mark
them
as
non-interacting.
*/
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionCount,
__global
ushort2*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
unsigned
int
numBlocks,
__global
real2*
restrict
sortedBlocks,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
__global
const
int*
restrict
rebuildNeighborList
)
{
...
...
@@ -180,8 +179,8 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
numAtoms
=
0
;
real2
sortedKey
=
sortedBlocks[i]
;
unsigned
short
x
=
(
unsigned
short
)
sortedKey.y
;
real4
blockCenterX
=
b
lockCenter[
x
]
;
real4
blockSizeX
=
b
lockBoundingBox[
x
]
;
real4
blockCenterX
=
sortedB
lockCenter[
i
]
;
real4
blockSizeX
=
sortedB
lockBoundingBox[
i
]
;
//
Load
exclusion
data
for
block
x.
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
7bfb75c7
...
...
@@ -424,6 +424,7 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].z = posq1.z;
localData[localAtomIndex].q = posq1.w;
localData[get_local_id(0)].bornRadius = bornRadius1;
SYNC_WARPS;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
...
...
@@ -454,8 +455,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
}
#endif
SYNC_WARPS;
}
SYNC_WARPS;
}
}
else {
...
...
@@ -472,6 +473,7 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].fy = 0.0f;
localData[get_local_id(0)].fz = 0.0f;
localData[get_local_id(0)].fw = 0.0f;
SYNC_WARPS;
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
...
...
@@ -627,6 +629,7 @@ __kernel void computeGBSAForce1(
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
}
SYNC_WARPS
;
#
ifdef
USE_PERIODIC
if
(
singlePeriodicCopy
)
{
//
The
box
is
small
enough
that
we
can
just
translate
all
the
atoms
into
a
single
periodic
...
...
platforms/opencl/src/kernels/langevin.cl
View file @
7bfb75c7
...
...
@@ -36,6 +36,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea
double
invStepSize
=
1.0/dt[0].y
;
#
else
float
invStepSize
=
1.0f/dt[0].y
;
float
correction
=
(
1.0f-invStepSize*dt[0].y
)
/dt[0].y
;
#
endif
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
...
...
@@ -53,7 +54,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea
#
ifdef
SUPPORTS_DOUBLE_PRECISION
vel.xyz
=
convert_mixed4
(
invStepSize*convert_double4
(
delta
))
.
xyz
;
#
else
vel.xyz
=
invStepSize*delta.xyz
;
vel.xyz
=
invStepSize*delta.xyz
+
correction*delta.xyz
;
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
7bfb75c7
...
...
@@ -81,6 +81,7 @@ __kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize,
}
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
/**
*
Sum
the
various
buffers
containing
forces.
*/
...
...
@@ -94,6 +95,7 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
buffer[index]
=
sum
;
}
}
#
endif
/**
*
This
is
called
to
determine
the
accuracy
of
various
native
functions.
...
...
platforms/opencl/src/kernels/verlet.cl
View file @
7bfb75c7
...
...
@@ -38,6 +38,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _
double
oneOverDt
=
1.0/stepSize.y
;
#
else
float
oneOverDt
=
1.0f/stepSize.y
;
float
correction
=
(
1.0f-oneOverDt*stepSize.y
)
/stepSize.y
;
#
endif
if
(
get_global_id
(
0
)
==
0
)
dt[0].x
=
stepSize.y
;
...
...
@@ -58,7 +59,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _
#
ifdef
SUPPORTS_DOUBLE_PRECISION
velocity.xyz
=
convert_mixed4
(
convert_double4
(
delta
)
*oneOverDt
)
.
xyz
;
#
else
velocity.xyz
=
delta.xyz*oneOverDt
;
velocity.xyz
=
delta.xyz*oneOverDt
+
delta.xyz*correction
;
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
...
...
platforms/opencl/tests/TestOpenCLCustomNonbondedForce.cpp
View file @
7bfb75c7
...
...
@@ -455,7 +455,7 @@ void testSwitchingFunction() {
positions
[
1
]
=
Vec3
(
r
+
delta
,
0
,
0
);
context
.
setPositions
(
positions
);
double
e2
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
ASSERT_EQUAL_TOL
((
e2
-
e1
)
/
(
2
*
delta
),
state
.
getForces
()[
0
][
0
],
1
e-3
);
ASSERT_EQUAL_TOL
((
e2
-
e1
)
/
(
2
*
delta
),
state
.
getForces
()[
0
][
0
],
2
e-3
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
View file @
7bfb75c7
...
...
@@ -33,7 +33,7 @@
using
namespace
OpenMM
;
extern
"C"
void
registerPlatforms
()
{
extern
"C"
OPENMM_EXPORT
void
registerPlatforms
()
{
}
extern
"C"
OPENMM_EXPORT
void
registerKernelFactories
()
{
...
...
plugins/cpupme/CMakeLists.txt
View file @
7bfb75c7
...
...
@@ -85,7 +85,10 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
ELSE
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
${
FFTW_LIBRARY
}
${
FFTW_THREADS_LIBRARY
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
${
FFTW_LIBRARY
}
)
IF
(
FFTW_THREADS_LIBRARY
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
FFTW_THREADS_LIBRARY
}
)
ENDIF
(
FFTW_THREADS_LIBRARY
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_PME_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
...
...
plugins/cpupme/src/CpuPmeKernelFactory.cpp
View file @
7bfb75c7
...
...
@@ -32,10 +32,10 @@
using
namespace
OpenMM
;
extern
"C"
void
registerPlatforms
()
{
extern
"C"
OPENMM_EXPORT_PME
void
registerPlatforms
()
{
}
extern
"C"
void
registerKernelFactories
()
{
extern
"C"
OPENMM_EXPORT_PME
void
registerKernelFactories
()
{
if
(
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
())
{
CpuPmeKernelFactory
*
factory
=
new
CpuPmeKernelFactory
();
for
(
int
i
=
0
;
i
<
Platform
::
getNumPlatforms
();
i
++
)
...
...
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
7bfb75c7
...
...
@@ -119,8 +119,9 @@ static void spreadCharge(int start, int end, float* posq, float* grid, int gridx
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
// Find the position relative to the nearest grid point.
__m128
pos
=
_mm_load_ps
(
&
posq
[
4
*
i
]);
__m128
posInBox
=
_mm_sub_ps
(
pos
,
_mm_mul_ps
(
boxSize
,
_mm_floor_ps
(
_mm_mul_ps
(
pos
,
invBoxSize
))));
__m128
pos
=
_mm_loadu_ps
(
&
posq
[
4
*
i
]);
__m128
posFloor
=
_mm_floor_ps
(
_mm_mul_ps
(
pos
,
invBoxSize
));
__m128
posInBox
=
_mm_sub_ps
(
pos
,
_mm_mul_ps
(
boxSize
,
posFloor
));
__m128
t
=
_mm_mul_ps
(
_mm_mul_ps
(
posInBox
,
invBoxSize
),
gridSize
);
__m128i
ti
=
_mm_cvttps_epi32
(
t
);
__m128
dr
=
_mm_sub_ps
(
t
,
_mm_cvtepi32_ps
(
ti
));
...
...
@@ -309,8 +310,9 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
// Find the position relative to the nearest grid point.
__m128
pos
=
_mm_load_ps
(
&
posq
[
4
*
i
]);
__m128
posInBox
=
_mm_sub_ps
(
pos
,
_mm_mul_ps
(
boxSize
,
_mm_floor_ps
(
_mm_mul_ps
(
pos
,
invBoxSize
))));
__m128
pos
=
_mm_loadu_ps
(
&
posq
[
4
*
i
]);
__m128
posFloor
=
_mm_floor_ps
(
_mm_mul_ps
(
pos
,
invBoxSize
));
__m128
posInBox
=
_mm_sub_ps
(
pos
,
_mm_mul_ps
(
boxSize
,
posFloor
));
__m128
t
=
_mm_mul_ps
(
_mm_mul_ps
(
posInBox
,
invBoxSize
),
gridSize
);
__m128i
ti
=
_mm_cvttps_epi32
(
t
);
__m128
dr
=
_mm_sub_ps
(
t
,
_mm_cvtepi32_ps
(
ti
));
...
...
@@ -638,11 +640,11 @@ int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum) {
// Attempt to factor the current value.
int
unfactored
=
minimum
;
for
(
int
factor
=
2
;
factor
<
12
;
factor
++
)
{
for
(
int
factor
=
2
;
factor
<
8
;
factor
++
)
{
while
(
unfactored
>
1
&&
unfactored
%
factor
==
0
)
unfactored
/=
factor
;
}
if
(
unfactored
==
1
)
if
(
unfactored
==
1
||
unfactored
==
11
||
unfactored
==
13
)
return
minimum
;
minimum
++
;
}
...
...
plugins/cpupme/tests/TestCpuPme.cpp
View file @
7bfb75c7
...
...
@@ -124,6 +124,10 @@ void testPME() {
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
())
{
cout
<<
"CPU is not supported. Exiting."
<<
endl
;
return
0
;
}
testPME
();
}
catch
(
const
exception
&
e
)
{
...
...
plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
View file @
7bfb75c7
...
...
@@ -34,10 +34,10 @@
using
namespace
OpenMM
;
extern
"C"
void
registerPlatforms
()
{
extern
"C"
OPENMM_EXPORT
void
registerPlatforms
()
{
}
extern
"C"
void
registerKernelFactories
()
{
extern
"C"
OPENMM_EXPORT
void
registerKernelFactories
()
{
try
{
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
CudaDrudeKernelFactory
*
factory
=
new
CudaDrudeKernelFactory
();
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment