Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
50eb44c5
Commit
50eb44c5
authored
Jul 30, 2013
by
peastman
Browse files
Eliminated some #defines to improve the efficiency of PTX caching
parent
00f51540
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
51 additions
and
56 deletions
+51
-56
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+21
-26
platforms/cuda/src/kernels/andersenThermostat.cu
platforms/cuda/src/kernels/andersenThermostat.cu
+2
-2
platforms/cuda/src/kernels/brownian.cu
platforms/cuda/src/kernels/brownian.cu
+6
-6
platforms/cuda/src/kernels/constraints.cu
platforms/cuda/src/kernels/constraints.cu
+2
-2
platforms/cuda/src/kernels/langevin.cu
platforms/cuda/src/kernels/langevin.cu
+10
-10
platforms/cuda/src/kernels/verlet.cu
platforms/cuda/src/kernels/verlet.cu
+10
-10
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
50eb44c5
...
@@ -388,7 +388,6 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
...
@@ -388,7 +388,6 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if
(
!
hasInitializedKernel
)
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
hasInitializedKernel
=
true
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
constraints
,
defines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
constraints
,
defines
);
applyDeltasKernel
=
cu
.
getKernel
(
module
,
"applyPositionDeltas"
);
applyDeltasKernel
=
cu
.
getKernel
(
module
,
"applyPositionDeltas"
);
}
}
...
@@ -396,7 +395,8 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
...
@@ -396,7 +395,8 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
cu
.
clearBuffer
(
integration
.
getPosDelta
());
cu
.
clearBuffer
(
integration
.
getPosDelta
());
integration
.
applyConstraints
(
tol
);
integration
.
applyConstraints
(
tol
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getIntegrationUtilities
().
getPosDelta
().
getDevicePointer
()};
int
numAtoms
=
cu
.
getNumAtoms
();
void
*
args
[]
=
{
&
numAtoms
,
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getIntegrationUtilities
().
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
applyDeltasKernel
,
args
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
applyDeltasKernel
,
args
,
cu
.
getNumAtoms
());
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
}
}
...
@@ -4156,8 +4156,6 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
...
@@ -4156,8 +4156,6 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
verlet
,
defines
,
""
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
verlet
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateVerletPart1"
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateVerletPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateVerletPart2"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateVerletPart2"
);
...
@@ -4168,6 +4166,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
...
@@ -4168,6 +4166,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
double
dt
=
integrator
.
getStepSize
();
if
(
dt
!=
prevStepSize
)
{
if
(
dt
!=
prevStepSize
)
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
...
@@ -4186,7 +4185,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
...
@@ -4186,7 +4185,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
// Call the first integration kernel.
// Call the first integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args1
[]
=
{
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
void
*
args1
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
...
@@ -4196,7 +4195,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
...
@@ -4196,7 +4195,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
// Call the second integration kernel.
// Call the second integration kernel.
void
*
args2
[]
=
{
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
void
*
args2
[]
=
{
&
numAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
...
@@ -4223,8 +4222,6 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan
...
@@ -4223,8 +4222,6 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
...
@@ -4236,6 +4233,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
...
@@ -4236,6 +4233,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
double
temperature
=
integrator
.
getTemperature
();
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
double
stepSize
=
integrator
.
getStepSize
();
...
@@ -4273,7 +4271,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
...
@@ -4273,7 +4271,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
// Call the first integration kernel.
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
void
*
args1
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
...
@@ -4284,7 +4282,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
...
@@ -4284,7 +4282,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
// Call the second integration kernel.
// Call the second integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args2
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
integration
.
getPosDelta
().
getDevicePointer
(),
void
*
args2
[]
=
{
&
numAtoms
,
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
...
@@ -4308,8 +4306,6 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro
...
@@ -4308,8 +4306,6 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
brownian
,
defines
,
""
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
brownian
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateBrownianPart1"
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateBrownianPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateBrownianPart2"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateBrownianPart2"
);
...
@@ -4320,6 +4316,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
...
@@ -4320,6 +4316,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
double
temperature
=
integrator
.
getTemperature
();
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
double
stepSize
=
integrator
.
getStepSize
();
...
@@ -4334,7 +4331,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
...
@@ -4334,7 +4331,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
// Call the first integration kernel.
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
useDouble
?
(
void
*
)
&
tauDt
:
(
void
*
)
&
tauDtFloat
,
void
*
args1
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
useDouble
?
(
void
*
)
&
tauDt
:
(
void
*
)
&
tauDtFloat
,
useDouble
?
(
void
*
)
&
noise
:
(
void
*
)
&
noiseFloat
,
useDouble
?
(
void
*
)
&
noise
:
(
void
*
)
&
noiseFloat
,
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
...
@@ -4347,7 +4344,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
...
@@ -4347,7 +4344,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
// Call the second integration kernel.
// Call the second integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args2
[]
=
{
useDouble
?
(
void
*
)
&
stepSize
:
(
void
*
)
&
stepSizeFloat
,
void
*
args2
[]
=
{
&
numAtoms
,
useDouble
?
(
void
*
)
&
stepSize
:
(
void
*
)
&
stepSizeFloat
,
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
...
@@ -4370,8 +4367,6 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con
...
@@ -4370,8 +4367,6 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
verlet
,
defines
,
""
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
verlet
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateVerletPart1"
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateVerletPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateVerletPart2"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateVerletPart2"
);
...
@@ -4383,6 +4378,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
...
@@ -4383,6 +4378,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
// Select the step size to use.
// Select the step size to use.
...
@@ -4391,7 +4387,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
...
@@ -4391,7 +4387,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
double
tol
=
integrator
.
getErrorTolerance
();
double
tol
=
integrator
.
getErrorTolerance
();
float
tolFloat
=
(
float
)
tol
;
float
tolFloat
=
(
float
)
tol
;
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
void
*
argsSelect
[]
=
{
useDouble
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
void
*
argsSelect
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
useDouble
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
useDouble
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
useDouble
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
()};
...
@@ -4401,7 +4397,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
...
@@ -4401,7 +4397,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
// Call the first integration kernel.
// Call the first integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args1
[]
=
{
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
void
*
args1
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
...
@@ -4411,7 +4407,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
...
@@ -4411,7 +4407,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
// Call the second integration kernel.
// Call the second integration kernel.
void
*
args2
[]
=
{
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
void
*
args2
[]
=
{
&
numAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
...
@@ -4456,8 +4452,6 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
...
@@ -4456,8 +4452,6 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
...
@@ -4471,6 +4465,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
...
@@ -4471,6 +4465,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
// Select the step size to use.
// Select the step size to use.
...
@@ -4483,7 +4478,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
...
@@ -4483,7 +4478,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
double
kT
=
BOLTZ
*
integrator
.
getTemperature
();
double
kT
=
BOLTZ
*
integrator
.
getTemperature
();
float
kTFloat
=
(
float
)
kT
;
float
kTFloat
=
(
float
)
kT
;
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
void
*
argsSelect
[]
=
{
useDouble
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
void
*
argsSelect
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
useDouble
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
useDouble
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
useDouble
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
useDouble
?
(
void
*
)
&
tau
:
(
void
*
)
&
tauFloat
,
useDouble
?
(
void
*
)
&
tau
:
(
void
*
)
&
tauFloat
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTFloat
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTFloat
,
...
@@ -4495,7 +4490,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
...
@@ -4495,7 +4490,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
// Call the first integration kernel.
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
void
*
args1
[]
=
{
&
numAtoms
,
&
paddedNumAtoms
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
...
@@ -4506,7 +4501,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
...
@@ -4506,7 +4501,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
// Call the second integration kernel.
// Call the second integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args2
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
integration
.
getPosDelta
().
getDevicePointer
(),
void
*
args2
[]
=
{
&
numAtoms
,
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
...
@@ -5369,7 +5364,6 @@ void CudaApplyAndersenThermostatKernel::initialize(const System& system, const A
...
@@ -5369,7 +5364,6 @@ void CudaApplyAndersenThermostatKernel::initialize(const System& system, const A
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
randomSeed
=
thermostat
.
getRandomNumberSeed
();
randomSeed
=
thermostat
.
getRandomNumberSeed
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
andersenThermostat
,
defines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
andersenThermostat
,
defines
);
kernel
=
cu
.
getKernel
(
module
,
"applyAndersenThermostat"
);
kernel
=
cu
.
getKernel
(
module
,
"applyAndersenThermostat"
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
randomSeed
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
randomSeed
);
...
@@ -5391,7 +5385,8 @@ void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
...
@@ -5391,7 +5385,8 @@ void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
float
frequency
=
(
float
)
context
.
getParameter
(
AndersenThermostat
::
CollisionFrequency
());
float
frequency
=
(
float
)
context
.
getParameter
(
AndersenThermostat
::
CollisionFrequency
());
float
kT
=
(
float
)
(
BOLTZ
*
context
.
getParameter
(
AndersenThermostat
::
Temperature
()));
float
kT
=
(
float
)
(
BOLTZ
*
context
.
getParameter
(
AndersenThermostat
::
Temperature
()));
int
randomIndex
=
cu
.
getIntegrationUtilities
().
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
int
randomIndex
=
cu
.
getIntegrationUtilities
().
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args
[]
=
{
&
frequency
,
&
kT
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
int
numAtoms
=
cu
.
getNumAtoms
();
void
*
args
[]
=
{
&
numAtoms
,
&
frequency
,
&
kT
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getRandom
().
getDevicePointer
(),
&
randomIndex
,
&
atomGroups
->
getDevicePointer
()};
&
cu
.
getIntegrationUtilities
().
getRandom
().
getDevicePointer
(),
&
randomIndex
,
&
atomGroups
->
getDevicePointer
()};
cu
.
executeKernel
(
kernel
,
args
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
kernel
,
args
,
cu
.
getNumAtoms
());
}
}
...
...
platforms/cuda/src/kernels/andersenThermostat.cu
View file @
50eb44c5
...
@@ -2,11 +2,11 @@
...
@@ -2,11 +2,11 @@
* Apply the Andersen thermostat to adjust particle velocities.
* Apply the Andersen thermostat to adjust particle velocities.
*/
*/
extern
"C"
__global__
void
applyAndersenThermostat
(
float
collisionFrequency
,
float
kT
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
stepSize
,
const
float4
*
__restrict__
random
,
extern
"C"
__global__
void
applyAndersenThermostat
(
int
numAtoms
,
float
collisionFrequency
,
float
kT
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
stepSize
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
,
const
int
*
__restrict__
atomGroups
)
{
unsigned
int
randomIndex
,
const
int
*
__restrict__
atomGroups
)
{
float
collisionProbability
=
1.0
f
-
expf
(
-
(
float
)
(
collisionFrequency
*
stepSize
[
0
].
y
));
float
collisionProbability
=
1.0
f
-
expf
(
-
(
float
)
(
collisionFrequency
*
stepSize
[
0
].
y
));
float
randomRange
=
erff
(
collisionProbability
/
sqrtf
(
2.0
f
));
float
randomRange
=
erff
(
collisionProbability
/
sqrtf
(
2.0
f
));
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
float4
selectRand
=
random
[
randomIndex
+
atomGroups
[
index
]];
float4
selectRand
=
random
[
randomIndex
+
atomGroups
[
index
]];
float4
velRand
=
random
[
randomIndex
+
index
];
float4
velRand
=
random
[
randomIndex
+
index
];
...
...
platforms/cuda/src/kernels/brownian.cu
View file @
50eb44c5
...
@@ -2,16 +2,16 @@
...
@@ -2,16 +2,16 @@
* Perform the first step of Brownian integration.
* Perform the first step of Brownian integration.
*/
*/
extern
"C"
__global__
void
integrateBrownianPart1
(
mixed
tauDeltaT
,
mixed
noiseAmplitude
,
const
long
long
*
__restrict__
force
,
extern
"C"
__global__
void
integrateBrownianPart1
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed
tauDeltaT
,
mixed
noiseAmplitude
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
const
mixed4
*
__restrict__
velm
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
mixed4
*
__restrict__
posDelta
,
const
mixed4
*
__restrict__
velm
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
randomIndex
+=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
mixed
fscale
=
tauDeltaT
/
(
mixed
)
0x100000000
;
const
mixed
fscale
=
tauDeltaT
/
(
mixed
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
if
(
invMass
!=
0
)
{
if
(
invMass
!=
0
)
{
posDelta
[
index
].
x
=
fscale
*
invMass
*
force
[
index
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
x
;
posDelta
[
index
].
x
=
fscale
*
invMass
*
force
[
index
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
x
;
posDelta
[
index
].
y
=
fscale
*
invMass
*
force
[
index
+
PADDED_NUM_ATOMS
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
y
;
posDelta
[
index
].
y
=
fscale
*
invMass
*
force
[
index
+
paddedNumAtoms
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
y
;
posDelta
[
index
].
z
=
fscale
*
invMass
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
z
;
posDelta
[
index
].
z
=
fscale
*
invMass
*
force
[
index
+
paddedNumAtoms
*
2
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
z
;
}
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
...
@@ -21,9 +21,9 @@ extern "C" __global__ void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAm
...
@@ -21,9 +21,9 @@ extern "C" __global__ void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAm
* Perform the second step of Brownian integration.
* Perform the second step of Brownian integration.
*/
*/
extern
"C"
__global__
void
integrateBrownianPart2
(
mixed
deltaT
,
real4
*
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
integrateBrownianPart2
(
int
numAtoms
,
mixed
deltaT
,
real4
*
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
const
mixed
oneOverDeltaT
=
RECIP
(
deltaT
);
const
mixed
oneOverDeltaT
=
RECIP
(
deltaT
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
if
(
velm
[
index
].
w
!=
0
)
{
if
(
velm
[
index
].
w
!=
0
)
{
mixed4
delta
=
posDelta
[
index
];
mixed4
delta
=
posDelta
[
index
];
velm
[
index
].
x
=
oneOverDeltaT
*
delta
.
x
;
velm
[
index
].
x
=
oneOverDeltaT
*
delta
.
x
;
...
...
platforms/cuda/src/kernels/constraints.cu
View file @
50eb44c5
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
applyPositionDeltas
(
int
numAtoms
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
index
];
real4
pos1
=
posq
[
index
];
real4
pos2
=
posqCorrection
[
index
];
real4
pos2
=
posqCorrection
[
index
];
...
...
platforms/cuda/src/kernels/langevin.cu
View file @
50eb44c5
...
@@ -4,7 +4,7 @@ enum {VelScale, ForceScale, NoiseScale, MaxParams};
...
@@ -4,7 +4,7 @@ enum {VelScale, ForceScale, NoiseScale, MaxParams};
* Perform the first step of Langevin integration.
* Perform the first step of Langevin integration.
*/
*/
extern
"C"
__global__
void
integrateLangevinPart1
(
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
extern
"C"
__global__
void
integrateLangevinPart1
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
const
mixed
*
__restrict__
paramBuffer
,
const
mixed2
*
__restrict__
dt
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
const
mixed
*
__restrict__
paramBuffer
,
const
mixed2
*
__restrict__
dt
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
mixed
vscale
=
paramBuffer
[
VelScale
];
mixed
vscale
=
paramBuffer
[
VelScale
];
mixed
fscale
=
paramBuffer
[
ForceScale
]
/
(
mixed
)
0x100000000
;
mixed
fscale
=
paramBuffer
[
ForceScale
]
/
(
mixed
)
0x100000000
;
...
@@ -12,13 +12,13 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
...
@@ -12,13 +12,13 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
mixed
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
randomIndex
+=
index
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0
)
{
if
(
velocity
.
w
!=
0
)
{
mixed
sqrtInvMass
=
SQRT
(
velocity
.
w
);
mixed
sqrtInvMass
=
SQRT
(
velocity
.
w
);
velocity
.
x
=
vscale
*
velocity
.
x
+
fscale
*
velocity
.
w
*
force
[
index
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
x
;
velocity
.
x
=
vscale
*
velocity
.
x
+
fscale
*
velocity
.
w
*
force
[
index
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
x
;
velocity
.
y
=
vscale
*
velocity
.
y
+
fscale
*
velocity
.
w
*
force
[
index
+
PADDED_NUM_ATOMS
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
y
;
velocity
.
y
=
vscale
*
velocity
.
y
+
fscale
*
velocity
.
w
*
force
[
index
+
paddedNumAtoms
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
y
;
velocity
.
z
=
vscale
*
velocity
.
z
+
fscale
*
velocity
.
w
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
z
;
velocity
.
z
=
vscale
*
velocity
.
z
+
fscale
*
velocity
.
w
*
force
[
index
+
paddedNumAtoms
*
2
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
z
;
velm
[
index
]
=
velocity
;
velm
[
index
]
=
velocity
;
posDelta
[
index
]
=
make_mixed4
(
stepSize
*
velocity
.
x
,
stepSize
*
velocity
.
y
,
stepSize
*
velocity
.
z
,
0
);
posDelta
[
index
]
=
make_mixed4
(
stepSize
*
velocity
.
x
,
stepSize
*
velocity
.
y
,
stepSize
*
velocity
.
z
,
0
);
}
}
...
@@ -31,7 +31,7 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
...
@@ -31,7 +31,7 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
* Perform the second step of Langevin integration.
* Perform the second step of Langevin integration.
*/
*/
extern
"C"
__global__
void
integrateLangevinPart2
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
mixed2
*
__restrict__
dt
)
{
extern
"C"
__global__
void
integrateLangevinPart2
(
int
numAtoms
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
mixed2
*
__restrict__
dt
)
{
#if __CUDA_ARCH__ >= 130
#if __CUDA_ARCH__ >= 130
double
invStepSize
=
1.0
/
dt
[
0
].
y
;
double
invStepSize
=
1.0
/
dt
[
0
].
y
;
#else
#else
...
@@ -39,7 +39,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
...
@@ -39,7 +39,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
float
correction
=
(
1.0
f
-
invStepSize
*
dt
[
0
].
y
)
/
dt
[
0
].
y
;
float
correction
=
(
1.0
f
-
invStepSize
*
dt
[
0
].
y
)
/
dt
[
0
].
y
;
#endif
#endif
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
numAtoms
)
{
mixed4
vel
=
velm
[
index
];
mixed4
vel
=
velm
[
index
];
if
(
vel
.
w
!=
0
)
{
if
(
vel
.
w
!=
0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -78,7 +78,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
...
@@ -78,7 +78,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
* Select the step size to use for the next step.
* Select the step size to use for the next step.
*/
*/
extern
"C"
__global__
void
selectLangevinStepSize
(
mixed
maxStepSize
,
mixed
errorTol
,
mixed
tau
,
mixed
kT
,
mixed2
*
__restrict__
dt
,
extern
"C"
__global__
void
selectLangevinStepSize
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed
maxStepSize
,
mixed
errorTol
,
mixed
tau
,
mixed
kT
,
mixed2
*
__restrict__
dt
,
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed
*
__restrict__
paramBuffer
)
{
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed
*
__restrict__
paramBuffer
)
{
// Calculate the error.
// Calculate the error.
...
@@ -87,8 +87,8 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
...
@@ -87,8 +87,8 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
mixed
err
=
0
;
mixed
err
=
0
;
unsigned
int
index
=
threadIdx
.
x
;
unsigned
int
index
=
threadIdx
.
x
;
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
numAtoms
)
{
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]);
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
paddedNumAtoms
],
scale
*
force
[
index
+
paddedNumAtoms
*
2
]);
mixed
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
...
@@ -106,7 +106,7 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
...
@@ -106,7 +106,7 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
if
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
==
0
)
{
if
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
==
0
)
{
// Select the new step size.
// Select the new step size.
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
NUM_ATOMS
*
3
));
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
numAtoms
*
3
));
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
oldStepSize
=
dt
[
0
].
y
;
mixed
oldStepSize
=
dt
[
0
].
y
;
if
(
oldStepSize
>
0.0
f
)
if
(
oldStepSize
>
0.0
f
)
...
...
platforms/cuda/src/kernels/verlet.cu
View file @
50eb44c5
...
@@ -2,13 +2,13 @@
...
@@ -2,13 +2,13 @@
* Perform the first step of Verlet integration.
* Perform the first step of Verlet integration.
*/
*/
extern
"C"
__global__
void
integrateVerletPart1
(
const
mixed2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
integrateVerletPart1
(
int
numAtoms
,
int
paddedNumAtoms
,
const
mixed2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
)
{
const
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
)
{
const
mixed2
stepSize
=
dt
[
0
];
const
mixed2
stepSize
=
dt
[
0
];
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
dtVel
/
(
mixed
)
0x100000000
;
const
mixed
scale
=
dtVel
/
(
mixed
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -19,8 +19,8 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
...
@@ -19,8 +19,8 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
real4
pos
=
posq
[
index
];
real4
pos
=
posq
[
index
];
#endif
#endif
velocity
.
x
+=
scale
*
force
[
index
]
*
velocity
.
w
;
velocity
.
x
+=
scale
*
force
[
index
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
index
+
PADDED_NUM_ATOMS
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
index
+
paddedNumAtoms
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
index
+
paddedNumAtoms
*
2
]
*
velocity
.
w
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
...
@@ -34,7 +34,7 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
...
@@ -34,7 +34,7 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
* Perform the second step of Verlet integration.
* Perform the second step of Verlet integration.
*/
*/
extern
"C"
__global__
void
integrateVerletPart2
(
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
integrateVerletPart2
(
int
numAtoms
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
mixed2
stepSize
=
dt
[
0
];
mixed2
stepSize
=
dt
[
0
];
#if __CUDA_ARCH__ >= 130
#if __CUDA_ARCH__ >= 130
...
@@ -46,7 +46,7 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
...
@@ -46,7 +46,7 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
dt
[
0
].
x
=
stepSize
.
y
;
for
(;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -80,14 +80,14 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
...
@@ -80,14 +80,14 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
* Select the step size to use for the next step.
* Select the step size to use for the next step.
*/
*/
extern
"C"
__global__
void
selectVerletStepSize
(
mixed
maxStepSize
,
mixed
errorTol
,
mixed2
*
__restrict__
dt
,
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
)
{
extern
"C"
__global__
void
selectVerletStepSize
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed
maxStepSize
,
mixed
errorTol
,
mixed2
*
__restrict__
dt
,
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
)
{
// Calculate the error.
// Calculate the error.
extern
__shared__
mixed
error
[];
extern
__shared__
mixed
error
[];
mixed
err
=
0.0
f
;
mixed
err
=
0.0
f
;
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
for
(
int
index
=
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]);
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
paddedNumAtoms
],
scale
*
force
[
index
+
paddedNumAtoms
*
2
]);
mixed
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
}
}
...
@@ -102,7 +102,7 @@ extern "C" __global__ void selectVerletStepSize(mixed maxStepSize, mixed errorTo
...
@@ -102,7 +102,7 @@ extern "C" __global__ void selectVerletStepSize(mixed maxStepSize, mixed errorTo
__syncthreads
();
__syncthreads
();
}
}
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
NUM_ATOMS
*
3
));
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
numAtoms
*
3
));
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
oldStepSize
=
dt
[
0
].
y
;
mixed
oldStepSize
=
dt
[
0
].
y
;
if
(
oldStepSize
>
0.0
f
)
if
(
oldStepSize
>
0.0
f
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment