Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3862202e
"plugins/drude/openmmapi/vscode:/vscode.git/clone" did not exist on "80e1b66f0a4daf8fff8bbe6f2f6186e6c42cb8f5"
Commit
3862202e
authored
Jul 12, 2013
by
Justin MacCallum
Browse files
Merge branch 'upstream' into fork
parents
e1a4e015
73882ac5
Changes
249
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
351 additions
and
171 deletions
+351
-171
platforms/cuda/src/CudaExpressionUtilities.cpp
platforms/cuda/src/CudaExpressionUtilities.cpp
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+227
-135
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+3
-1
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+14
-3
platforms/cuda/src/kernels/customIntegrator.cu
platforms/cuda/src/kernels/customIntegrator.cu
+2
-2
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+3
-5
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+9
-9
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+13
-3
platforms/cuda/tests/TestCudaAndersenThermostat.cpp
platforms/cuda/tests/TestCudaAndersenThermostat.cpp
+1
-1
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
+1
-1
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
+1
-1
platforms/cuda/tests/TestCudaCMMotionRemover.cpp
platforms/cuda/tests/TestCudaCMMotionRemover.cpp
+1
-1
platforms/cuda/tests/TestCudaCustomBondForce.cpp
platforms/cuda/tests/TestCudaCustomBondForce.cpp
+1
-1
platforms/cuda/tests/TestCudaCustomExternalForce.cpp
platforms/cuda/tests/TestCudaCustomExternalForce.cpp
+1
-1
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
+68
-1
platforms/cuda/tests/TestCudaEwald.cpp
platforms/cuda/tests/TestCudaEwald.cpp
+1
-1
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
+1
-1
platforms/cuda/tests/TestCudaHarmonicAngleForce.cpp
platforms/cuda/tests/TestCudaHarmonicAngleForce.cpp
+1
-1
platforms/cuda/tests/TestCudaLangevinIntegrator.cpp
platforms/cuda/tests/TestCudaLangevinIntegrator.cpp
+1
-1
platforms/cuda/tests/TestCudaMonteCarloBarostat.cpp
platforms/cuda/tests/TestCudaMonteCarloBarostat.cpp
+1
-1
No files found.
platforms/cuda/src/CudaExpressionUtilities.cpp
View file @
3862202e
...
...
@@ -181,7 +181,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"ASIN("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ACOS
:
out
<<
"AC
S
O("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"ACO
S
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
case
Operation
::
ATAN
:
out
<<
"ATAN("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
3862202e
...
...
@@ -44,8 +44,8 @@
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMUtilities.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include <cmath>
#include <set>
...
...
@@ -84,10 +84,12 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
void
CudaCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cu
.
setAsCurrent
();
cu
.
clearAutoclearBuffers
();
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cu
.
setComputeForceCount
(
cu
.
getComputeForceCount
()
+
1
);
cu
.
clearAutoclearBuffers
();
if
(
includeNonbonded
)
nb
.
prepareInteractions
();
}
...
...
@@ -96,8 +98,10 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
if
((
groups
&
(
1
<<
cu
.
getNonbondedUtilities
().
getForceGroup
()))
!=
0
)
cu
.
getNonbondedUtilities
().
computeInteractions
();
cu
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
double
sum
=
0.0
;
for
(
vector
<
CudaContext
::
ForcePostComputation
*>::
iterator
iter
=
cu
.
getPostComputations
().
begin
();
iter
!=
cu
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
cu
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
if
(
includeEnergy
)
{
CudaArray
&
energyArray
=
cu
.
getEnergyBuffer
();
if
(
cu
.
getUseDoublePrecision
())
{
...
...
@@ -1330,6 +1334,58 @@ private:
const
NonbondedForce
&
force
;
};
class
CudaCalcNonbondedForceKernel
::
PmeIO
:
public
CalcPmeReciprocalForceKernel
::
IO
{
public:
PmeIO
(
CudaContext
&
cu
,
CUfunction
addForcesKernel
)
:
cu
(
cu
),
addForcesKernel
(
addForcesKernel
),
forceTemp
(
NULL
)
{
forceTemp
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"PmeForce"
);
}
~
PmeIO
()
{
if
(
forceTemp
!=
NULL
)
delete
forceTemp
;
}
float
*
getPosq
()
{
cu
.
setAsCurrent
();
cu
.
getPosq
().
download
(
posq
);
return
(
float
*
)
&
posq
[
0
];
}
void
setForce
(
float
*
force
)
{
forceTemp
->
upload
(
force
);
void
*
args
[]
=
{
&
forceTemp
->
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
()};
cu
.
executeKernel
(
addForcesKernel
,
args
,
cu
.
getNumAtoms
());
}
private:
CudaContext
&
cu
;
vector
<
float4
>
posq
;
CudaArray
*
forceTemp
;
CUfunction
addForcesKernel
;
};
class
CudaCalcNonbondedForceKernel
::
PmePreComputation
:
public
CudaContext
::
ForcePreComputation
{
public:
PmePreComputation
(
CudaContext
&
cu
,
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
cu
(
cu
),
pme
(
pme
),
io
(
io
)
{
}
void
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
Vec3
boxSize
(
cu
.
getPeriodicBoxSize
().
x
,
cu
.
getPeriodicBoxSize
().
y
,
cu
.
getPeriodicBoxSize
().
z
);
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
beginComputation
(
io
,
boxSize
,
includeEnergy
);
}
private:
CudaContext
&
cu
;
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
class
CudaCalcNonbondedForceKernel
::
PmePostComputation
:
public
CudaContext
::
ForcePostComputation
{
public:
PmePostComputation
(
Kernel
&
pme
,
CalcPmeReciprocalForceKernel
::
IO
&
io
)
:
pme
(
pme
),
io
(
io
)
{
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
return
pme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
private:
Kernel
pme
;
CalcPmeReciprocalForceKernel
::
IO
&
io
;
};
CudaCalcNonbondedForceKernel
::~
CudaCalcNonbondedForceKernel
()
{
cu
.
setAsCurrent
();
if
(
sigmaEpsilon
!=
NULL
)
...
...
@@ -1354,6 +1410,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete
pmeAtomGridIndex
;
if
(
sort
!=
NULL
)
delete
sort
;
if
(
pmeio
!=
NULL
)
delete
pmeio
;
if
(
hasInitializedFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftBackward
);
...
...
@@ -1457,7 +1515,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
else
dispersionCoefficient
=
0.0
;
alpha
=
0
;
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
&&
cu
.
getContextIndex
()
==
0
)
{
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
...
...
@@ -1465,7 +1523,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
// Create the reciprocal space kernels.
...
...
@@ -1484,7 +1542,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double2
)
:
sizeof
(
float2
));
cosSinSums
=
new
CudaArray
(
cu
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
&&
cu
.
getContextIndex
()
==
0
)
{
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
...
...
@@ -1497,7 +1555,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
ewaldSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
)
;
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
pmeDefines
[
"PME_ORDER"
]
=
cu
.
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numParticles
);
pmeDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
...
...
@@ -1510,111 +1568,128 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if
(
cu
.
getUseDoublePrecision
())
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cu
.
getKernel
(
module
,
"gridInterpolateForce"
);
pmeEvalEnergyKernel
=
cu
.
getKernel
(
module
,
"gridEvaluateEnergy"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
cuFuncSetCacheConfig
(
pmeSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeInterpolateForceKernel
,
CU_FUNC_CACHE_PREFER_L1
);
// Create required data structures.
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
elementSize
:
sizeof
(
long
long
),
"originalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
(
gridSizeZ
/
2
+
1
),
2
*
elementSize
,
"reciprocalPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fftForward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
try
{
cpuPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
*
cu
.
getPlatformData
().
context
);
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSizeX
,
gridSizeY
,
gridSizeZ
,
numParticles
,
alpha
);
CUfunction
addForcesKernel
=
cu
.
getKernel
(
module
,
"addForces"
);
pmeio
=
new
PmeIO
(
cu
,
addForcesKernel
);
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuPme
,
*
pmeio
));
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
}
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
}
}
if
(
pmeio
==
NULL
)
{
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cu
.
getKernel
(
module
,
"gridInterpolateForce"
);
pmeEvalEnergyKernel
=
cu
.
getKernel
(
module
,
"gridEvaluateEnergy"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
cuFuncSetCacheConfig
(
pmeSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeInterpolateForceKernel
,
CU_FUNC_CACHE_PREFER_L1
);
// Create required data structures.
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
elementSize
:
sizeof
(
long
long
),
"originalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
(
gridSizeZ
/
2
+
1
),
2
*
elementSize
,
"reciprocalPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fftForward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
cufftSetCompatibilityMode
(
fftForward
,
CUFFT_COMPATIBILITY_NATIVE
);
cufftSetCompatibilityMode
(
fftBackward
,
CUFFT_COMPATIBILITY_NATIVE
);
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
hasInitializedFFT
=
true
;
// Differentiate.
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
double
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
sc
*
sc
+
ss
*
ss
;
}
moduli
[
i
]
=
sc
*
sc
+
ss
*
ss
;
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
;
if
(
cu
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
;
if
(
cu
.
getUseDoublePrecision
())
{
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
else
{
vector
<
float
>
modulif
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
modulif
[
i
]
=
(
float
)
moduli
[
i
];
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
modulif
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
modulif
);
else
pmeBsplineModuliZ
->
upload
(
modulif
);
}
}
}
}
...
...
@@ -1654,13 +1729,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
double
CudaCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
cu
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
void
*
sumsArgs
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cosSinSums
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
ewaldSumsKernel
,
sumsArgs
,
cosSinSums
->
getSize
());
void
*
forcesArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cosSinSums
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
}
if
(
directPmeGrid
!=
NULL
&&
cu
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
...
...
@@ -4677,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Initialize the random number generator.
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
...
...
@@ -4783,10 +4842,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
)
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
usesVariable
(
expression
[
step
],
"uniform"
)
&&
!
usesVariable
(
expression
[
step
],
"gaussian"
))
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
!
usesVariable
(
expression
[
step
],
"uniform"
))
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
)
merged
[
step
]
=
true
;
}
...
...
@@ -4805,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex+index];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex+index];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
...
...
@@ -4824,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += NUM_ATOMS;
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += NUM_ATOMS;
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
...
@@ -4856,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
...
@@ -4925,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Create the kernel for summing the potential energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
...
...
@@ -4967,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs
.
push_back
(
&
globalValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
()
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
...
...
@@ -5037,7 +5123,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Loop over computation steps in the integrator and execute them.
void
*
randomArgs
[]
=
{
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
int
maxUniformRandoms
=
uniformRandoms
->
getSize
();
void
*
randomArgs
[]
=
{
&
maxUniformRandoms
,
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
int
lastForceGroups
=
context
.
getLastForceGroups
();
...
...
@@ -5086,7 +5173,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
...
...
@@ -5101,7 +5190,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
clearBuffer
(
*
sumBuffer
);
...
...
@@ -5152,6 +5243,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
int
randomIndex
=
0
;
kineticEnergyArgs
[
1
]
=
&
posCorrection
;
kineticEnergyArgs
[
9
]
=
&
cu
.
getIntegrationUtilities
().
getRandom
().
getDevicePointer
();
kineticEnergyArgs
[
10
]
=
&
randomIndex
;
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
3862202e
...
...
@@ -457,7 +457,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
bool
useShuffle
=
(
context
.
getComputeCapability
()
>=
3.0
);
int
cudaVersion
;
cuDriverGetVersion
(
&
cudaVersion
);
bool
useShuffle
=
(
context
.
getComputeCapability
()
>=
3.0
&&
cudaVersion
>=
5050
);
// Part 1. Defines for on diagonal exclusion tiles
stringstream
loadLocal1
;
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
3862202e
...
...
@@ -87,12 +87,14 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaDeviceName
());
platformProperties
.
push_back
(
CudaUseBlockingSync
());
platformProperties
.
push_back
(
CudaPrecision
());
platformProperties
.
push_back
(
CudaUseCpuPme
());
platformProperties
.
push_back
(
CudaCompiler
());
platformProperties
.
push_back
(
CudaTempDirectory
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
...
@@ -141,13 +143,20 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaUseBlockingSync
())
:
properties
.
find
(
CudaUseBlockingSync
())
->
second
);
string
precisionPropValue
=
(
properties
.
find
(
CudaPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaPrecision
())
:
properties
.
find
(
CudaPrecision
())
->
second
);
string
cpuPmePropValue
=
(
properties
.
find
(
CudaUseCpuPme
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaUseCpuPme
())
:
properties
.
find
(
CudaUseCpuPme
())
->
second
);
const
string
&
compilerPropValue
=
(
properties
.
find
(
CudaCompiler
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaCompiler
())
:
properties
.
find
(
CudaCompiler
())
->
second
);
const
string
&
tempPropValue
=
(
properties
.
find
(
CudaTempDirectory
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaTempDirectory
())
:
properties
.
find
(
CudaTempDirectory
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
compilerPropValue
,
tempPropValue
));
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -155,8 +164,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete
data
;
}
CudaPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
...
...
@@ -185,10 +194,12 @@ CudaPlatform::PlatformData::PlatformData(const System& system, const string& dev
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
deviceName
<<
name
;
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaPrecision
()]
=
precisionProperty
;
propertyValues
[
CudaPlatform
::
CudaUseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaCompiler
()]
=
compilerProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
...
...
platforms/cuda/src/kernels/customIntegrator.cu
View file @
3862202e
...
...
@@ -52,10 +52,10 @@ extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4*
}
}
extern
"C"
__global__
void
generateRandomNumbers
(
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
extern
"C"
__global__
void
generateRandomNumbers
(
int
numValues
,
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
uint4
state
=
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
];
unsigned
int
carry
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numValues
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Generate three uniform random numbers.
state
.
x
=
state
.
x
*
69069
+
1
;
...
...
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
3862202e
...
...
@@ -34,11 +34,10 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
random
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
gaussianBase
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
...
...
@@ -50,11 +49,10 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double
mass
=
1.0
/
velocity
.
w
;
if
(
velocity
.
w
!=
0.0
)
{
float4
gaussian
=
gaussian
Values
[
random
Index
]
;
float4
uniform
=
uniformValues
[
index
]
;
int
gaussian
Index
=
gaussian
Base
Index
;
int
uniform
Index
=
0
;
COMPUTE_STEP
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
3862202e
...
...
@@ -364,9 +364,9 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
mixed
yaksYd
=
zaksZd
*
xaksXd
-
xaksZd
*
zaksXd
;
mixed
zaksYd
=
xaksZd
*
yaksXd
-
yaksZd
*
xaksXd
;
mixed
axlng
=
SQRT
(
xaksXd
*
xaksXd
+
yaksXd
*
yaksXd
+
zaksXd
*
zaksXd
);
mixed
aylng
=
SQRT
(
xaksYd
*
xaksYd
+
yaksYd
*
yaksYd
+
zaksYd
*
zaksYd
);
mixed
azlng
=
SQRT
(
xaksZd
*
xaksZd
+
yaksZd
*
yaksZd
+
zaksZd
*
zaksZd
);
mixed
axlng
=
sqrt
(
xaksXd
*
xaksXd
+
yaksXd
*
yaksXd
+
zaksXd
*
zaksXd
);
mixed
aylng
=
sqrt
(
xaksYd
*
xaksYd
+
yaksYd
*
yaksYd
+
zaksYd
*
zaksYd
);
mixed
azlng
=
sqrt
(
xaksZd
*
xaksZd
+
yaksZd
*
yaksZd
+
zaksZd
*
zaksZd
);
mixed
trns11
=
xaksXd
/
axlng
;
mixed
trns21
=
yaksXd
/
axlng
;
mixed
trns31
=
zaksXd
/
axlng
;
...
...
@@ -392,13 +392,13 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
// --- Step2 A2' ---
float
rc
=
0.5
f
*
params
.
y
;
mixed
rb
=
SQRT
(
params
.
x
*
params
.
x
-
rc
*
rc
);
mixed
rb
=
sqrt
(
params
.
x
*
params
.
x
-
rc
*
rc
);
mixed
ra
=
rb
*
(
m1
+
m2
)
*
invTotalMass
;
rb
-=
ra
;
mixed
sinphi
=
za1d
/
ra
;
mixed
cosphi
=
SQRT
(
1
-
sinphi
*
sinphi
);
mixed
cosphi
=
sqrt
(
1
-
sinphi
*
sinphi
);
mixed
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2
*
rc
*
cosphi
);
mixed
cospsi
=
SQRT
(
1
-
sinpsi
*
sinpsi
);
mixed
cospsi
=
sqrt
(
1
-
sinpsi
*
sinpsi
);
mixed
ya2d
=
ra
*
cosphi
;
mixed
xb2d
=
-
rc
*
cospsi
;
...
...
@@ -406,7 +406,7 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
mixed
yc2d
=
-
rb
*
cosphi
+
rc
*
sinpsi
*
sinphi
;
mixed
xb2d2
=
xb2d
*
xb2d
;
mixed
hh2
=
4.0
f
*
xb2d2
+
(
yb2d
-
yc2d
)
*
(
yb2d
-
yc2d
)
+
(
zb1d
-
zc1d
)
*
(
zb1d
-
zc1d
);
mixed
deltx
=
2.0
f
*
xb2d
+
SQRT
(
4.0
f
*
xb2d2
-
hh2
+
params
.
y
*
params
.
y
);
mixed
deltx
=
2.0
f
*
xb2d
+
sqrt
(
4.0
f
*
xb2d2
-
hh2
+
params
.
y
*
params
.
y
);
xb2d
-=
deltx
*
0.5
f
;
// --- Step3 al,be,ga ---
...
...
@@ -416,11 +416,11 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
mixed
gamma
=
xb0d
*
yb1d
-
xb1d
*
yb0d
+
xc0d
*
yc1d
-
xc1d
*
yc0d
;
mixed
al2be2
=
alpha
*
alpha
+
beta
*
beta
;
mixed
sintheta
=
(
alpha
*
gamma
-
beta
*
SQRT
(
al2be2
-
gamma
*
gamma
))
/
al2be2
;
mixed
sintheta
=
(
alpha
*
gamma
-
beta
*
sqrt
(
al2be2
-
gamma
*
gamma
))
/
al2be2
;
// --- Step4 A3' ---
mixed
costheta
=
SQRT
(
1
-
sintheta
*
sintheta
);
mixed
costheta
=
sqrt
(
1
-
sintheta
*
sintheta
);
mixed
xa3d
=
-
ya2d
*
sintheta
;
mixed
ya3d
=
ya2d
*
costheta
;
mixed
za3d
=
za1d
;
...
...
platforms/cuda/src/kernels/pme.cu
View file @
3862202e
...
...
@@ -266,8 +266,18 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
x
*
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
y
*
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
));
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
x
*
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
y
*
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
));
}
}
extern
"C"
__global__
void
addForces
(
const
real4
*
__restrict__
forces
,
unsigned
long
long
*
__restrict__
forceBuffers
)
{
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
f
=
forces
[
atom
];
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
x
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
y
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0x100000000
));
}
}
platforms/cuda/tests/TestCudaAndersenThermostat.cpp
View file @
3862202e
...
...
@@ -40,7 +40,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
View file @
3862202e
...
...
@@ -43,7 +43,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/BrownianIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
View file @
3862202e
...
...
@@ -40,7 +40,7 @@
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaCMMotionRemover.cpp
View file @
3862202e
...
...
@@ -42,7 +42,7 @@
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaCustomBondForce.cpp
View file @
3862202e
...
...
@@ -39,7 +39,7 @@
#include "openmm/CustomBondForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaCustomExternalForce.cpp
View file @
3862202e
...
...
@@ -39,7 +39,7 @@
#include "openmm/CustomExternalForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
View file @
3862202e
...
...
@@ -41,7 +41,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/CustomIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
@@ -651,6 +651,72 @@ void testRespa() {
}
}
/**
* Make sure random numbers are computed correctly when steps get merged.
*/
void
testMergedRandoms
()
{
const
int
numParticles
=
10
;
const
int
numSteps
=
10
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addPerDofVariable
(
"dofUniform1"
,
0
);
integrator
.
addPerDofVariable
(
"dofUniform2"
,
0
);
integrator
.
addPerDofVariable
(
"dofGaussian1"
,
0
);
integrator
.
addPerDofVariable
(
"dofGaussian2"
,
0
);
integrator
.
addGlobalVariable
(
"globalUniform1"
,
0
);
integrator
.
addGlobalVariable
(
"globalUniform2"
,
0
);
integrator
.
addGlobalVariable
(
"globalGaussian1"
,
0
);
integrator
.
addGlobalVariable
(
"globalGaussian2"
,
0
);
integrator
.
addComputePerDof
(
"dofUniform1"
,
"uniform"
);
integrator
.
addComputePerDof
(
"dofUniform2"
,
"uniform"
);
integrator
.
addComputePerDof
(
"dofGaussian1"
,
"gaussian"
);
integrator
.
addComputePerDof
(
"dofGaussian2"
,
"gaussian"
);
integrator
.
addComputeGlobal
(
"globalUniform1"
,
"uniform"
);
integrator
.
addComputeGlobal
(
"globalUniform2"
,
"uniform"
);
integrator
.
addComputeGlobal
(
"globalGaussian1"
,
"gaussian"
);
integrator
.
addComputeGlobal
(
"globalGaussian2"
,
"gaussian"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the random numbers are computed correctly.
vector
<
Vec3
>
values1
,
values2
;
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integrator
.
step
(
1
);
integrator
.
getPerDofVariable
(
0
,
values1
);
integrator
.
getPerDofVariable
(
1
,
values2
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v1
=
values1
[
i
][
j
];
double
v2
=
values2
[
i
][
j
];
ASSERT
(
v1
>=
0
&&
v1
<
1
);
ASSERT
(
v2
>=
0
&&
v2
<
1
);
ASSERT
(
v1
!=
v2
);
}
integrator
.
getPerDofVariable
(
2
,
values1
);
integrator
.
getPerDofVariable
(
3
,
values2
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v1
=
values1
[
i
][
j
];
double
v2
=
values2
[
i
][
j
];
ASSERT
(
v1
>=
-
10
&&
v1
<
10
);
ASSERT
(
v2
>=
-
10
&&
v2
<
10
);
ASSERT
(
v1
!=
v2
);
}
double
v1
=
integrator
.
getGlobalVariable
(
0
);
double
v2
=
integrator
.
getGlobalVariable
(
1
);
ASSERT
(
v1
>=
0
&&
v1
<
1
);
ASSERT
(
v2
>=
0
&&
v2
<
1
);
ASSERT
(
v1
!=
v2
);
v1
=
integrator
.
getGlobalVariable
(
2
);
v2
=
integrator
.
getGlobalVariable
(
3
);
ASSERT
(
v1
>=
-
10
&&
v1
<
10
);
ASSERT
(
v2
>=
-
10
&&
v2
<
10
);
ASSERT
(
v1
!=
v2
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
...
...
@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
testPerDofVariables
();
testForceGroups
();
testRespa
();
testMergedRandoms
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda/tests/TestCudaEwald.cpp
View file @
3862202e
...
...
@@ -42,7 +42,7 @@
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/internal/ContextImpl.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
View file @
3862202e
...
...
@@ -40,7 +40,7 @@
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include "openmm/NonbondedForce.h"
#include <iostream>
...
...
platforms/cuda/tests/TestCudaHarmonicAngleForce.cpp
View file @
3862202e
...
...
@@ -40,7 +40,7 @@
#include "openmm/HarmonicAngleForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaLangevinIntegrator.cpp
View file @
3862202e
...
...
@@ -40,7 +40,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
...
...
platforms/cuda/tests/TestCudaMonteCarloBarostat.cpp
View file @
3862202e
...
...
@@ -42,7 +42,7 @@
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include "
../src/SimTKUtilities/
SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
...
...
Prev
1
2
3
4
5
6
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment