Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f1991b3c
Commit
f1991b3c
authored
May 06, 2014
by
peastman
Browse files
Fixed bug using multiple devices with PME
parent
5ed9dd65
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
296 additions
and
286 deletions
+296
-286
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+155
-151
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+6
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+129
-125
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+6
-5
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
f1991b3c
...
@@ -1520,7 +1520,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1520,7 +1520,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
else
else
dispersionCoefficient
=
0.0
;
dispersionCoefficient
=
0.0
;
alpha
=
0
;
alpha
=
0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
&&
cu
.
getContextIndex
()
==
0
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
// Compute the Ewald parameters.
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
int
kmaxx
,
kmaxy
,
kmaxz
;
...
@@ -1528,26 +1528,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1528,26 +1528,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
if
(
cu
.
getContextIndex
()
==
0
)
{
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
// Create the reciprocal space kernels.
// Create the reciprocal space kernels.
map
<
string
,
string
>
replacements
;
replacements
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numParticles
);
map
<
string
,
string
>
replacements
;
replacements
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
replacements
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numParticles
);
replacements
[
"KMAX_X"
]
=
cu
.
intToString
(
kmaxx
);
replacements
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
replacements
[
"KMAX_Y"
]
=
cu
.
intToString
(
kmaxy
);
replacements
[
"KMAX_X"
]
=
cu
.
intToString
(
kmaxx
);
replacements
[
"KMAX_Z"
]
=
cu
.
intToString
(
kmaxz
);
replacements
[
"KMAX_Y"
]
=
cu
.
intToString
(
kmaxy
);
replacements
[
"EXP_COEFFICIENT"
]
=
cu
.
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
replacements
[
"KMAX_Z"
]
=
cu
.
intToString
(
kmaxz
);
replacements
[
"ONE_4PI_EPS0"
]
=
cu
.
doubleToString
(
ONE_4PI_EPS0
);
replacements
[
"EXP_COEFFICIENT"
]
=
cu
.
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
replacements
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
replacements
[
"ONE_4PI_EPS0"
]
=
cu
.
doubleToString
(
ONE_4PI_EPS0
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
ewald
,
replacements
);
replacements
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
ewaldSumsKernel
=
cu
.
getKernel
(
module
,
"calculateEwaldCosSinSums"
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
ewald
,
replacements
);
ewaldForcesKernel
=
cu
.
getKernel
(
module
,
"calculateEwaldForces"
);
ewaldSumsKernel
=
cu
.
getKernel
(
module
,
"calculateEwaldCosSinSums"
);
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double2
)
:
sizeof
(
float2
));
ewaldForcesKernel
=
cu
.
getKernel
(
module
,
"calculateEwaldForces"
);
cosSinSums
=
new
CudaArray
(
cu
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double2
)
:
sizeof
(
float2
));
}
cosSinSums
=
new
CudaArray
(
cu
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
&&
cu
.
getContextIndex
()
==
0
)
{
}
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
// Compute the PME parameters.
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
...
@@ -1560,140 +1562,142 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1560,140 +1562,142 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
if
(
cu
.
getContextIndex
()
==
0
)
{
pmeDefines
[
"PME_ORDER"
]
=
cu
.
intToString
(
PmeOrder
);
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
pmeDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numParticles
);
pmeDefines
[
"PME_ORDER"
]
=
cu
.
intToString
(
PmeOrder
);
pmeDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
pmeDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cu
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
pmeDefines
[
"GRID_SIZE_X"
]
=
cu
.
intToString
(
gridSizeX
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cu
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"GRID_SIZE_Y"
]
=
cu
.
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_X"
]
=
cu
.
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
cu
.
intToString
(
gridSizeZ
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
cu
.
intToString
(
gridSizeY
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
cu
.
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
pmeDefines
[
"GRID_SIZE_Z"
]
=
cu
.
intToString
(
gridSizeZ
);
pmeDefines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
cu
.
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
if
(
cu
.
getUseDoublePrecision
())
pmeDefines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
if
(
cu
.
getUseDoublePrecision
())
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
// Create the CPU PME kernel.
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cu
.
getPlatformData
().
context
);
try
{
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cu
.
getPlatformData
().
context
);
CUfunction
addForcesKernel
=
cu
.
getKernel
(
module
,
"addForces"
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
pmeio
=
new
PmeIO
(
cu
,
addForcesKernel
);
CUfunction
addForcesKernel
=
cu
.
getKernel
(
module
,
"addForces"
);
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuPme
,
*
pmeio
));
pmeio
=
new
PmeIO
(
cu
,
addForcesKernel
);
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuPme
,
*
pmeio
));
}
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
catch
(
OpenMMException
&
ex
)
{
}
// The CPU PME plugin isn't available.
catch
(
OpenMMException
&
ex
)
{
}
// The CPU PME plugin isn't available.
}
}
if
(
pmeio
==
NULL
)
{
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cu
.
getKernel
(
module
,
"gridInterpolateForce"
);
pmeEvalEnergyKernel
=
cu
.
getKernel
(
module
,
"gridEvaluateEnergy"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
cuFuncSetCacheConfig
(
pmeSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeInterpolateForceKernel
,
CU_FUNC_CACHE_PREFER_L1
);
// Create required data structures.
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
elementSize
:
sizeof
(
long
long
),
"originalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
(
gridSizeZ
/
2
+
1
),
2
*
elementSize
,
"reciprocalPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fftForward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
}
if
(
pmeio
==
NULL
)
{
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cu
.
getKernel
(
module
,
"gridInterpolateForce"
);
pmeEvalEnergyKernel
=
cu
.
getKernel
(
module
,
"gridEvaluateEnergy"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
cuFuncSetCacheConfig
(
pmeSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeInterpolateForceKernel
,
CU_FUNC_CACHE_PREFER_L1
);
// Create required data structures.
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
elementSize
:
sizeof
(
long
long
),
"originalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
(
gridSizeZ
/
2
+
1
),
2
*
elementSize
,
"reciprocalPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fftForward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
0
]
=
-
data
[
0
];
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
double
div
=
1.0
/
(
PmeOrder
-
1
);
d
data
[
i
]
=
data
[
i
-
1
]
-
data
[
i
]
;
data
[
PmeOrder
-
1
]
=
0.0
;
double
div
=
1.0
/
(
PmeOrder
-
1
);
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
])
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
0
]
=
div
*
data
[
0
];
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
data
[
0
]
=
div
*
data
[
0
]
;
bsplines_
data
[
i
]
=
0.0
;
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
for
(
int
i
=
1
;
i
<
=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
bsplines_data
[
i
]
=
data
[
i
-
1
]
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
vector
<
double
>
moduli
(
ndata
);
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
vector
<
double
>
moduli
(
ndata
)
;
double
sc
=
0.0
;
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
ss
=
0.0
;
double
sc
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
ss
=
0.0
;
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
)
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
}
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
)
;
moduli
[
i
]
=
sc
*
sc
+
ss
*
ss
;
}
}
moduli
[
i
]
=
sc
*
sc
+
ss
*
ss
;
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
;
if
(
cu
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
moduli
[
i
]
<
1.0e-7
)
if
(
dim
==
0
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
;
pmeBsplineModuliX
->
upload
(
modulif
);
if
(
cu
.
getUseDoublePrecision
())
{
else
if
(
dim
==
1
)
if
(
dim
==
0
)
pmeBsplineModuliY
->
upload
(
modulif
);
pmeBsplineModuliX
->
upload
(
moduli
);
else
else
if
(
dim
==
1
)
pmeBsplineModuliZ
->
upload
(
modulif
);
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
}
}
}
}
}
}
}
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
f1991b3c
...
@@ -748,7 +748,7 @@ void testChangingParameters() {
...
@@ -748,7 +748,7 @@ void testChangingParameters() {
ASSERT_EQUAL_TOL
(
cuState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_TOL
(
cuState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
}
}
void
testParallelComputation
(
bool
useCutoff
)
{
void
testParallelComputation
(
NonbondedForce
::
NonbondedMethod
method
)
{
System
system
;
System
system
;
const
int
numParticles
=
200
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
...
@@ -756,9 +756,9 @@ void testParallelComputation(bool useCutoff) {
...
@@ -756,9 +756,9 @@ void testParallelComputation(bool useCutoff) {
NonbondedForce
*
force
=
new
NonbondedForce
();
NonbondedForce
*
force
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
addParticle
(
i
%
2
-
0.5
,
0.5
,
1.0
);
force
->
addParticle
(
i
%
2
-
0.5
,
0.5
,
1.0
);
if
(
useCutoff
)
force
->
setNonbondedMethod
(
method
);
force
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
system
.
addForce
(
force
);
system
.
addForce
(
force
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
5
,
0
,
0
),
Vec3
(
0
,
5
,
0
),
Vec3
(
0
,
0
,
5
));
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
positions
(
numParticles
);
...
@@ -877,8 +877,9 @@ int main(int argc, char* argv[]) {
...
@@ -877,8 +877,9 @@ int main(int argc, char* argv[]) {
//testBlockInteractions(true);
//testBlockInteractions(true);
testDispersionCorrection
();
testDispersionCorrection
();
testChangingParameters
();
testChangingParameters
();
testParallelComputation
(
false
);
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
true
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
}
}
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
f1991b3c
...
@@ -1492,7 +1492,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1492,7 +1492,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
else
dispersionCoefficient
=
0.0
;
dispersionCoefficient
=
0.0
;
alpha
=
0
;
alpha
=
0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
&&
cl
.
getContextIndex
()
==
0
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
// Compute the Ewald parameters.
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
int
kmaxx
,
kmaxy
,
kmaxz
;
...
@@ -1500,23 +1500,25 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1500,23 +1500,25 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
if
(
cl
.
getContextIndex
()
==
0
)
{
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
// Create the reciprocal space kernels.
// Create the reciprocal space kernels.
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
replacements
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
replacements
[
"KMAX_X"
]
=
cl
.
intToString
(
kmaxx
);
replacements
[
"KMAX_X"
]
=
cl
.
intToString
(
kmaxx
);
replacements
[
"KMAX_Y"
]
=
cl
.
intToString
(
kmaxy
);
replacements
[
"KMAX_Y"
]
=
cl
.
intToString
(
kmaxy
);
replacements
[
"KMAX_Z"
]
=
cl
.
intToString
(
kmaxz
);
replacements
[
"KMAX_Z"
]
=
cl
.
intToString
(
kmaxz
);
replacements
[
"EXP_COEFFICIENT"
]
=
cl
.
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
replacements
[
"EXP_COEFFICIENT"
]
=
cl
.
doubleToString
(
-
1.0
/
(
4.0
*
alpha
*
alpha
));
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
ewald
,
replacements
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
ewald
,
replacements
);
ewaldSumsKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldCosSinSums"
);
ewaldSumsKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldCosSinSums"
);
ewaldForcesKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldForces"
);
ewaldForcesKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldForces"
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double2
)
:
sizeof
(
mm_float2
));
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
&&
cl
.
getContextIndex
()
==
0
)
{
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
// Compute the PME parameters.
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
...
@@ -1527,119 +1529,121 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1527,119 +1529,121 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cl
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cl
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
if
(
cl
.
getContextIndex
()
==
0
)
{
pmeDefines
[
"PME_ORDER"
]
=
cl
.
intToString
(
PmeOrder
);
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
pmeDefines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
pmeDefines
[
"PME_ORDER"
]
=
cl
.
intToString
(
PmeOrder
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cl
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
numParticles
);
pmeDefines
[
"GRID_SIZE_X"
]
=
cl
.
intToString
(
gridSizeX
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
cl
.
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"GRID_SIZE_Y"
]
=
cl
.
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_X"
]
=
cl
.
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
cl
.
intToString
(
gridSizeZ
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
cl
.
intToString
(
gridSizeY
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
cl
.
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
pmeDefines
[
"GRID_SIZE_Z"
]
=
cl
.
intToString
(
gridSizeZ
);
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
cl
.
doubleToString
(
sqrt
(
ONE_4PI_EPS0
));
if
(
deviceIsCpu
)
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
pmeDefines
[
"DEVICE_IS_CPU"
]
=
"1"
;
if
(
deviceIsCpu
)
if
(
cl
.
getPlatformData
().
useCpuPme
)
{
pmeDefines
[
"DEVICE_IS_CPU"
]
=
"1"
;
// Create the CPU PME kernel.
if
(
cl
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cl
.
getPlatformData
().
context
);
try
{
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cl
.
getPlatformData
().
context
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
pme
,
pmeDefines
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
cl
::
Kernel
addForcesKernel
=
cl
::
Kernel
(
program
,
"addForces"
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
pme
,
pmeDefines
);
pmeio
=
new
PmeIO
(
cl
,
addForcesKernel
);
cl
::
Kernel
addForcesKernel
=
cl
::
Kernel
(
program
,
"addForces"
);
cl
.
addPreComputation
(
new
PmePreComputation
(
cl
,
cpuPme
,
*
pmeio
));
pmeio
=
new
PmeIO
(
cl
,
addForcesKernel
);
cl
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
cl
.
addPreComputation
(
new
PmePreComputation
(
cl
,
cpuPme
,
*
pmeio
));
}
cl
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
}
}
if
(
pmeio
==
NULL
)
{
// Create required data structures.
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cl
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeGrid2
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
(
cl
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
(
cl
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
(
cl
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
}
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
catch
(
OpenMMException
&
ex
)
{
{
// The CPU PME plugin isn't available.
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
}
}
if
(
cl
.
getUseDoublePrecision
())
{
}
if
(
dim
==
0
)
if
(
pmeio
==
NULL
)
{
pmeBsplineModuliX
->
upload
(
moduli
);
// Create required data structures.
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
int
elementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
else
pmeGrid
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
pmeBsplineModuliZ
->
upload
(
moduli
);
cl
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeGrid2
=
new
OpenCLArray
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid2"
);
pmeBsplineModuliX
=
new
OpenCLArray
(
cl
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
(
cl
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
(
cl
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
}
else
{
vector
<
float
>
modulif
(
ndata
);
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
{
if
(
dim
==
0
)
if
(
moduli
[
i
]
<
1.0e-7
)
pmeBsplineModuliX
->
upload
(
modulif
);
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
else
if
(
dim
==
1
)
}
pmeBsplineModuliY
->
upload
(
modulif
);
if
(
cl
.
getUseDoublePrecision
())
{
else
if
(
dim
==
0
)
pmeBsplineModuliZ
->
upload
(
modulif
);
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
}
}
}
}
}
}
}
...
...
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
View file @
f1991b3c
...
@@ -751,7 +751,7 @@ void testChangingParameters() {
...
@@ -751,7 +751,7 @@ void testChangingParameters() {
ASSERT_EQUAL_TOL
(
clState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_TOL
(
clState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
}
}
void
testParallelComputation
(
bool
useCutoff
)
{
void
testParallelComputation
(
NonbondedForce
::
NonbondedMethod
method
)
{
System
system
;
System
system
;
const
int
numParticles
=
200
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
...
@@ -759,9 +759,9 @@ void testParallelComputation(bool useCutoff) {
...
@@ -759,9 +759,9 @@ void testParallelComputation(bool useCutoff) {
NonbondedForce
*
force
=
new
NonbondedForce
();
NonbondedForce
*
force
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
addParticle
(
i
%
2
-
0.5
,
0.5
,
1.0
);
force
->
addParticle
(
i
%
2
-
0.5
,
0.5
,
1.0
);
if
(
useCutoff
)
force
->
setNonbondedMethod
(
method
);
force
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
system
.
addForce
(
force
);
system
.
addForce
(
force
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
5
,
0
,
0
),
Vec3
(
0
,
5
,
0
),
Vec3
(
0
,
0
,
5
));
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
positions
(
numParticles
);
...
@@ -880,8 +880,9 @@ int main(int argc, char* argv[]) {
...
@@ -880,8 +880,9 @@ int main(int argc, char* argv[]) {
// testBlockInteractions(true);
// testBlockInteractions(true);
testDispersionCorrection
();
testDispersionCorrection
();
testChangingParameters
();
testChangingParameters
();
testParallelComputation
(
false
);
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
true
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment