Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8cda1b79
"docs-source/vscode:/vscode.git/clone" did not exist on "92dfd1ff4b3ea40c4e821b1dd2abce71f95ee267"
Commit
8cda1b79
authored
Oct 04, 2012
by
Peter Eastman
Browse files
Finished implementing mixed precision model
parent
9ad85ebd
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
159 additions
and
88 deletions
+159
-88
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+34
-24
platforms/cuda/src/kernels/brownian.cu
platforms/cuda/src/kernels/brownian.cu
+23
-10
platforms/cuda/src/kernels/constraints.cu
platforms/cuda/src/kernels/constraints.cu
+17
-6
platforms/cuda/src/kernels/customIntegrator.cu
platforms/cuda/src/kernels/customIntegrator.cu
+37
-10
platforms/cuda/src/kernels/customIntegratorGlobal.cu
platforms/cuda/src/kernels/customIntegratorGlobal.cu
+1
-1
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+34
-9
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+13
-28
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
8cda1b79
...
@@ -378,7 +378,8 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
...
@@ -378,7 +378,8 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
cu
.
clearBuffer
(
integration
.
getPosDelta
());
cu
.
clearBuffer
(
integration
.
getPosDelta
());
integration
.
applyConstraints
(
tol
);
integration
.
applyConstraints
(
tol
);
void
*
args
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getPosDelta
().
getDevicePointer
()};
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getIntegrationUtilities
().
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
applyDeltasKernel
,
args
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
applyDeltasKernel
,
args
,
cu
.
getNumAtoms
());
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
}
}
...
@@ -4144,12 +4145,13 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
...
@@ -4144,12 +4145,13 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
float
stepSizeFloat
=
(
float
)
stepSize
;
float
stepSizeFloat
=
(
float
)
stepSize
;
float
tauDtFloat
=
(
float
)
tauDt
;
float
tauDtFloat
=
(
float
)
tauDt
;
float
noiseFloat
=
(
float
)
noise
;
float
noiseFloat
=
(
float
)
noise
;
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
// Call the first integration kernel.
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
cu
.
getU
seDouble
Precision
()
?
(
void
*
)
&
tauDt
:
(
void
*
)
&
tauDtFloat
,
void
*
args1
[]
=
{
u
seDouble
?
(
void
*
)
&
tauDt
:
(
void
*
)
&
tauDtFloat
,
cu
.
getU
seDouble
Precision
()
?
(
void
*
)
&
noise
:
(
void
*
)
&
noiseFloat
,
u
seDouble
?
(
void
*
)
&
noise
:
(
void
*
)
&
noiseFloat
,
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
...
@@ -4160,8 +4162,9 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
...
@@ -4160,8 +4162,9 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
// Call the second integration kernel.
// Call the second integration kernel.
void
*
args2
[]
=
{
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
stepSize
:
(
void
*
)
&
stepSizeFloat
,
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
void
*
args2
[]
=
{
useDouble
?
(
void
*
)
&
stepSize
:
(
void
*
)
&
stepSizeFloat
,
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
integration
.
computeVirtualSites
();
...
@@ -4354,7 +4357,7 @@ public:
...
@@ -4354,7 +4357,7 @@ public:
return
;
return
;
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
{
if
(
deviceValuesAreCurrent
)
if
(
deviceValuesAreCurrent
)
perDofValues
.
getParameterValues
(
localPerDofValuesDouble
);
perDofValues
.
getParameterValues
(
localPerDofValuesDouble
);
vector
<
vector
<
double
>
>
swap
(
3
*
numAtoms
);
vector
<
vector
<
double
>
>
swap
(
3
*
numAtoms
);
...
@@ -4422,11 +4425,11 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
...
@@ -4422,11 +4425,11 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
globalValues
=
new
CudaArray
(
cu
,
max
(
1
,
numGlobalVariables
),
elementSize
,
"globalVariables"
);
globalValues
=
new
CudaArray
(
cu
,
max
(
1
,
numGlobalVariables
),
elementSize
,
"globalVariables"
);
sumBuffer
=
new
CudaArray
(
cu
,
3
*
system
.
getNumParticles
(),
elementSize
,
"sumBuffer"
);
sumBuffer
=
new
CudaArray
(
cu
,
3
*
system
.
getNumParticles
(),
elementSize
,
"sumBuffer"
);
energy
=
new
CudaArray
(
cu
,
1
,
e
lementSize
,
"energy"
);
energy
=
new
CudaArray
(
cu
,
1
,
cu
.
getEnergyBuffer
().
getE
lementSize
()
,
"energy"
);
perDofValues
=
new
CudaParameterSet
(
cu
,
integrator
.
getNumPerDofVariables
(),
3
*
system
.
getNumParticles
(),
"perDofVariables"
,
false
,
cu
.
getUseDoublePrecision
());
perDofValues
=
new
CudaParameterSet
(
cu
,
integrator
.
getNumPerDofVariables
(),
3
*
system
.
getNumParticles
(),
"perDofVariables"
,
false
,
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
);
cu
.
addReorderListener
(
new
ReorderListener
(
cu
,
*
perDofValues
,
localPerDofValuesFloat
,
localPerDofValuesDouble
,
deviceValuesAreCurrent
));
cu
.
addReorderListener
(
new
ReorderListener
(
cu
,
*
perDofValues
,
localPerDofValuesFloat
,
localPerDofValuesDouble
,
deviceValuesAreCurrent
));
prevStepSize
=
-
1.0
;
prevStepSize
=
-
1.0
;
SimTKOpenMMUtilities
::
setRandomNumberSeed
(
integrator
.
getRandomNumberSeed
());
SimTKOpenMMUtilities
::
setRandomNumberSeed
(
integrator
.
getRandomNumberSeed
());
...
@@ -4502,13 +4505,14 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4502,13 +4505,14 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numSteps
=
integrator
.
getNumComputations
();
int
numSteps
=
integrator
.
getNumComputations
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
if
(
!
hasInitializedKernels
)
{
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
hasInitializedKernels
=
true
;
// Initialize various data structures.
// Initialize various data structures.
const
map
<
string
,
double
>&
params
=
context
.
getParameters
();
const
map
<
string
,
double
>&
params
=
context
.
getParameters
();
if
(
cu
.
getU
seDouble
Precision
()
)
{
if
(
u
seDouble
)
{
contextParameterValues
=
CudaArray
::
create
<
double
>
(
cu
,
max
(
1
,
(
int
)
params
.
size
()),
"contextParameters"
);
contextParameterValues
=
CudaArray
::
create
<
double
>
(
cu
,
max
(
1
,
(
int
)
params
.
size
()),
"contextParameters"
);
contextValuesDouble
.
resize
(
contextParameterValues
->
getSize
());
contextValuesDouble
.
resize
(
contextParameterValues
->
getSize
());
for
(
map
<
string
,
double
>::
const_iterator
iter
=
params
.
begin
();
iter
!=
params
.
end
();
++
iter
)
{
for
(
map
<
string
,
double
>::
const_iterator
iter
=
params
.
begin
();
iter
!=
params
.
end
();
++
iter
)
{
...
@@ -4674,9 +4678,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4674,9 +4678,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
variable
[
j
]
==
"x"
)
{
if
(
storePosAsDelta
[
j
])
if
(
storePosAsDelta
[
j
])
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(
posq[
index
]
));
\n
"
;
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(
loadPos(posq, posqCorrection,
index
)
));
\n
"
;
else
else
compute
<<
"
posq[
index
] =
convertFromDouble4(position);
\n
"
;
compute
<<
"
storePos(posq, posqCorrection,
index
,
convertFromDouble4(position)
)
;
\n
"
;
}
}
else
if
(
variable
[
j
]
==
"v"
)
else
if
(
variable
[
j
]
==
"v"
)
compute
<<
"velm[index] = convertFromDouble4(velocity);
\n
"
;
compute
<<
"velm[index] = convertFromDouble4(velocity);
\n
"
;
...
@@ -4712,6 +4716,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4712,6 +4716,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
requiredUniform
[
step
]
=
numUniform
;
requiredUniform
[
step
]
=
numUniform
;
vector
<
void
*>
args1
;
vector
<
void
*>
args1
;
args1
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getVelm
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getVelm
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
...
@@ -4749,7 +4754,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4749,7 +4754,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
throw
OpenMMException
(
"Unknown global variable: "
+
variable
[
step
]);
throw
OpenMMException
(
"Unknown global variable: "
+
variable
[
step
]);
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
kernel
=
cu
.
getKernel
(
module
,
"compute
Sum"
);
kernel
=
cu
.
getKernel
(
module
,
useDouble
?
"computeDoubleSum"
:
"computeFloat
Sum"
);
kernels
[
step
].
push_back
(
kernel
);
kernels
[
step
].
push_back
(
kernel
);
kernelArgs
[
step
].
push_back
(
args2
);
kernelArgs
[
step
].
push_back
(
args2
);
}
}
...
@@ -4782,6 +4787,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4782,6 +4787,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
kernels
[
step
].
push_back
(
kernel
);
kernels
[
step
].
push_back
(
kernel
);
vector
<
void
*>
args
;
vector
<
void
*>
args
;
args
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args
.
push_back
(
NULL
);
args
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
args
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
kernelArgs
[
step
].
push_back
(
args
);
kernelArgs
[
step
].
push_back
(
args
);
}
}
...
@@ -4792,13 +4798,13 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4792,13 +4798,13 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
cu
.
getEnergyBuffer
().
getSize
());
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
cu
.
getEnergyBuffer
().
getSize
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
sumEnergyKernel
=
cu
.
getKernel
(
module
,
"compute
Sum"
);
sumEnergyKernel
=
cu
.
getKernel
(
module
,
cu
.
getUseDoublePrecision
()
?
"computeDoubleSum"
:
"computeFloat
Sum"
);
}
}
// Make sure all values (variables, parameters, etc.) stored on the device are up to date.
// Make sure all values (variables, parameters, etc.) stored on the device are up to date.
if
(
!
deviceValuesAreCurrent
)
{
if
(
!
deviceValuesAreCurrent
)
{
if
(
cu
.
getU
seDouble
Precision
()
)
if
(
u
seDouble
)
perDofValues
->
setParameterValues
(
localPerDofValuesDouble
);
perDofValues
->
setParameterValues
(
localPerDofValuesDouble
);
else
else
perDofValues
->
setParameterValues
(
localPerDofValuesFloat
);
perDofValues
->
setParameterValues
(
localPerDofValuesFloat
);
...
@@ -4807,7 +4813,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4807,7 +4813,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
localValuesAreCurrent
=
false
;
localValuesAreCurrent
=
false
;
double
stepSize
=
integrator
.
getStepSize
();
double
stepSize
=
integrator
.
getStepSize
();
if
(
stepSize
!=
prevStepSize
)
{
if
(
stepSize
!=
prevStepSize
)
{
if
(
cu
.
getU
seDouble
Precision
()
)
{
if
(
u
seDouble
)
{
double
size
[]
=
{
0
,
stepSize
};
double
size
[]
=
{
0
,
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
integration
.
getStepSize
().
upload
(
size
);
}
}
...
@@ -4818,7 +4824,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4818,7 +4824,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
prevStepSize
=
stepSize
;
prevStepSize
=
stepSize
;
}
}
bool
paramsChanged
=
false
;
bool
paramsChanged
=
false
;
if
(
cu
.
getU
seDouble
Precision
()
)
{
if
(
u
seDouble
)
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesDouble
[
i
])
{
if
(
value
!=
contextValuesDouble
[
i
])
{
...
@@ -4844,6 +4850,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4844,6 +4850,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Loop over computation steps in the integrator and execute them.
// Loop over computation steps in the integrator and execute them.
void
*
randomArgs
[]
=
{
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
void
*
randomArgs
[]
=
{
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
context
.
getLastForceGroups
()
!=
forceGroup
[
i
]))
{
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
context
.
getLastForceGroups
()
!=
forceGroup
[
i
]))
{
// Recompute forces and/or energy. Figure out what is actually needed
// Recompute forces and/or energy. Figure out what is actually needed
...
@@ -4872,7 +4879,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4872,7 +4879,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
9
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
...
@@ -4886,7 +4894,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4886,7 +4894,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
9
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
...
@@ -4898,6 +4907,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4898,6 +4907,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ConstrainPositions
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ConstrainPositions
)
{
cu
.
getIntegrationUtilities
().
applyConstraints
(
integrator
.
getConstraintTolerance
());
cu
.
getIntegrationUtilities
().
applyConstraints
(
integrator
.
getConstraintTolerance
());
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
getIntegrationUtilities
().
computeVirtualSites
();
cu
.
getIntegrationUtilities
().
computeVirtualSites
();
}
}
...
@@ -4918,7 +4928,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -4918,7 +4928,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
if
(
!
modifiesParameters
)
if
(
!
modifiesParameters
)
return
;
return
;
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
{
contextParameterValues
->
download
(
contextValuesDouble
);
contextParameterValues
->
download
(
contextValuesDouble
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
...
@@ -4940,7 +4950,7 @@ void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vec
...
@@ -4940,7 +4950,7 @@ void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vec
values
.
resize
(
numGlobalVariables
);
values
.
resize
(
numGlobalVariables
);
if
(
numGlobalVariables
==
0
)
if
(
numGlobalVariables
==
0
)
return
;
return
;
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
globalValues
->
download
(
values
);
globalValues
->
download
(
values
);
else
{
else
{
vector
<
float
>
buffer
;
vector
<
float
>
buffer
;
...
@@ -4953,7 +4963,7 @@ void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vec
...
@@ -4953,7 +4963,7 @@ void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vec
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
if
(
numGlobalVariables
==
0
)
if
(
numGlobalVariables
==
0
)
return
;
return
;
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
globalValues
->
upload
(
values
);
globalValues
->
upload
(
values
);
else
{
else
{
vector
<
float
>
buffer
(
numGlobalVariables
);
vector
<
float
>
buffer
(
numGlobalVariables
);
...
@@ -4966,7 +4976,7 @@ void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, con
...
@@ -4966,7 +4976,7 @@ void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, con
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
values
.
resize
(
perDofValues
->
getNumObjects
()
/
3
);
values
.
resize
(
perDofValues
->
getNumObjects
()
/
3
);
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
{
if
(
!
localValuesAreCurrent
)
{
if
(
!
localValuesAreCurrent
)
{
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
localValuesAreCurrent
=
true
;
localValuesAreCurrent
=
true
;
...
@@ -4988,7 +4998,7 @@ void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int
...
@@ -4988,7 +4998,7 @@ void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int
void
CudaIntegrateCustomStepKernel
::
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
vector
<
Vec3
>&
values
)
{
void
CudaIntegrateCustomStepKernel
::
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
vector
<
Vec3
>&
values
)
{
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
{
if
(
!
localValuesAreCurrent
)
{
if
(
!
localValuesAreCurrent
)
{
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
localValuesAreCurrent
=
true
;
localValuesAreCurrent
=
true
;
...
...
platforms/cuda/src/kernels/brownian.cu
View file @
8cda1b79
...
@@ -2,12 +2,12 @@
...
@@ -2,12 +2,12 @@
* Perform the first step of Brownian integration.
* Perform the first step of Brownian integration.
*/
*/
extern
"C"
__global__
void
integrateBrownianPart1
(
real
tauDeltaT
,
real
noiseAmplitude
,
const
long
long
*
__restrict__
force
,
extern
"C"
__global__
void
integrateBrownianPart1
(
mixed
tauDeltaT
,
mixed
noiseAmplitude
,
const
long
long
*
__restrict__
force
,
real
4
*
__restrict__
posDelta
,
const
real
4
*
__restrict__
velm
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
mixed
4
*
__restrict__
posDelta
,
const
mixed
4
*
__restrict__
velm
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
randomIndex
+=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
real
fscale
=
tauDeltaT
/
(
real
)
0xFFFFFFFF
;
const
mixed
fscale
=
tauDeltaT
/
(
mixed
)
0xFFFFFFFF
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
if
(
invMass
!=
0
)
{
if
(
invMass
!=
0
)
{
posDelta
[
index
].
x
=
fscale
*
invMass
*
force
[
index
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
x
;
posDelta
[
index
].
x
=
fscale
*
invMass
*
force
[
index
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
x
;
posDelta
[
index
].
y
=
fscale
*
invMass
*
force
[
index
+
PADDED_NUM_ATOMS
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
y
;
posDelta
[
index
].
y
=
fscale
*
invMass
*
force
[
index
+
PADDED_NUM_ATOMS
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
y
;
...
@@ -21,17 +21,30 @@ extern "C" __global__ void integrateBrownianPart1(real tauDeltaT, real noiseAmpl
...
@@ -21,17 +21,30 @@ extern "C" __global__ void integrateBrownianPart1(real tauDeltaT, real noiseAmpl
* Perform the second step of Brownian integration.
* Perform the second step of Brownian integration.
*/
*/
extern
"C"
__global__
void
integrateBrownianPart2
(
real
deltaT
,
real4
*
posq
,
real4
*
velm
,
const
real
4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
integrateBrownianPart2
(
mixed
deltaT
,
real4
*
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
velm
,
const
mixed
4
*
__restrict__
posDelta
)
{
const
real
oneOverDeltaT
=
RECIP
(
deltaT
);
const
mixed
oneOverDeltaT
=
RECIP
(
deltaT
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
if
(
velm
[
index
].
w
!=
0
)
{
if
(
velm
[
index
].
w
!=
0
)
{
real
4
delta
=
posDelta
[
index
];
mixed
4
delta
=
posDelta
[
index
];
velm
[
index
].
x
=
oneOverDeltaT
*
delta
.
x
;
velm
[
index
].
x
=
oneOverDeltaT
*
delta
.
x
;
velm
[
index
].
y
=
oneOverDeltaT
*
delta
.
y
;
velm
[
index
].
y
=
oneOverDeltaT
*
delta
.
y
;
velm
[
index
].
z
=
oneOverDeltaT
*
delta
.
z
;
velm
[
index
].
z
=
oneOverDeltaT
*
delta
.
z
;
posq
[
index
].
x
=
posq
[
index
].
x
+
delta
.
x
;
#ifdef USE_MIXED_PRECISION
posq
[
index
].
y
=
posq
[
index
].
y
+
delta
.
y
;
real4
pos1
=
posq
[
index
];
posq
[
index
].
z
=
posq
[
index
].
z
+
delta
.
z
;
real4
pos2
=
posqCorrection
[
index
];
mixed4
pos
=
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
#else
real4
pos
=
posq
[
index
];
#endif
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
#else
posq
[
index
]
=
pos
;
#endif
}
}
}
}
}
}
platforms/cuda/src/kernels/constraints.cu
View file @
8cda1b79
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
position
=
posq
[
index
];
#ifdef USE_MIXED_PRECISION
position
.
x
+=
posDelta
[
index
].
x
;
real4
pos1
=
posq
[
index
];
position
.
y
+=
posDelta
[
index
].
y
;
real4
pos2
=
posqCorrection
[
index
];
position
.
z
+=
posDelta
[
index
].
z
;
mixed4
pos
=
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
posq
[
index
]
=
position
;
#else
mixed4
pos
=
posq
[
index
];
#endif
pos
.
x
+=
posDelta
[
index
].
x
;
pos
.
y
+=
posDelta
[
index
].
y
;
pos
.
z
+=
posDelta
[
index
].
z
;
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
#else
posq
[
index
]
=
pos
;
#endif
}
}
}
}
platforms/cuda/src/kernels/customIntegrator.cu
View file @
8cda1b79
extern
"C"
__global__
void
computeSum
(
const
real
*
__restrict__
sumBuffer
,
real
*
result
)
{
extern
"C"
__global__
void
compute
Float
Sum
(
const
float
*
__restrict__
sumBuffer
,
float
*
result
)
{
__shared__
real
tempBuffer
[
WORK_GROUP_SIZE
];
__shared__
float
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
const
unsigned
int
thread
=
threadIdx
.
x
;
real
sum
=
0
;
float
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
SUM_BUFFER_SIZE
;
index
+=
blockDim
.
x
)
for
(
unsigned
int
index
=
thread
;
index
<
SUM_BUFFER_SIZE
;
index
+=
blockDim
.
x
)
sum
+=
sumBuffer
[
index
];
sum
+=
sumBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
tempBuffer
[
thread
]
=
sum
;
...
@@ -14,14 +14,41 @@ extern "C" __global__ void computeSum(const real* __restrict__ sumBuffer, real*
...
@@ -14,14 +14,41 @@ extern "C" __global__ void computeSum(const real* __restrict__ sumBuffer, real*
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
}
}
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
computeDoubleSum
(
const
double
*
__restrict__
sumBuffer
,
double
*
result
)
{
__shared__
double
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
double
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
SUM_BUFFER_SIZE
;
index
+=
blockDim
.
x
)
sum
+=
sumBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
WORK_GROUP_SIZE
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
WORK_GROUP_SIZE
)
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
}
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
position
=
posq
[
index
];
#ifdef USE_MIXED_PRECISION
position
.
x
+=
posDelta
[
index
].
x
;
real4
pos1
=
posq
[
index
];
position
.
y
+=
posDelta
[
index
].
y
;
real4
pos2
=
posqCorrection
[
index
];
position
.
z
+=
posDelta
[
index
].
z
;
mixed4
pos
=
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
posq
[
index
]
=
position
;
#else
posDelta
[
index
]
=
make_real4
(
0
,
0
,
0
,
0
);
real4
pos
=
posq
[
index
];
#endif
pos
.
x
+=
posDelta
[
index
].
x
;
pos
.
y
+=
posDelta
[
index
].
y
;
pos
.
z
+=
posDelta
[
index
].
z
;
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
#else
posq
[
index
]
=
pos
;
#endif
posDelta
[
index
]
=
make_mixed4
(
0
,
0
,
0
,
0
);
}
}
}
}
...
...
platforms/cuda/src/kernels/customIntegratorGlobal.cu
View file @
8cda1b79
extern
"C"
__global__
void
computeGlobal
(
real
2
*
__restrict__
dt
,
real
*
__restrict__
globals
,
real
*
__restrict__
params
,
extern
"C"
__global__
void
computeGlobal
(
mixed
2
*
__restrict__
dt
,
mixed
*
__restrict__
globals
,
mixed
*
__restrict__
params
,
float
uniform
,
float
gaussian
,
const
real
*
__restrict__
energy
)
{
float
uniform
,
float
gaussian
,
const
real
*
__restrict__
energy
)
{
COMPUTE_STEP
COMPUTE_STEP
}
}
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
8cda1b79
inline
__device__
double4
convertToDouble4
(
real4
a
)
{
/**
* Load the position of a particle.
*/
inline
__device__
mixed4
loadPos
(
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
posqCorrection
,
int
index
)
{
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
index
];
real4
pos2
=
posqCorrection
[
index
];
return
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
#else
return
posq
[
index
];
#endif
}
/**
* Store the position of a particle.
*/
inline
__device__
void
storePos
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
int
index
,
mixed4
pos
)
{
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
#else
posq
[
index
]
=
pos
;
#endif
}
inline
__device__
double4
convertToDouble4
(
mixed4
a
)
{
return
make_double4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
return
make_double4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
}
}
inline
__device__
real
4
convertFromDouble4
(
double4
a
)
{
inline
__device__
mixed
4
convertFromDouble4
(
double4
a
)
{
return
make_
real
4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
return
make_
mixed
4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
}
}
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
pos
Delta
,
real
4
*
__restrict__
velm
,
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
pos
qCorrection
,
mixed
4
*
__restrict__
posDelta
,
const
long
long
*
__restrict__
force
,
const
real
2
*
__restrict__
dt
,
const
real
*
__restrict__
globals
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed
2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
real
*
__restrict__
params
,
real
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
randomIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
randomIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
randomIndex
+=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
#ifdef LOAD_POS_AS_DELTA
double4
position
=
convertToDouble4
(
posq
[
index
]
+
posDelta
[
index
]);
double4
position
=
convertToDouble4
(
loadPos
(
posq
,
posqCorrection
,
index
)
+
posDelta
[
index
]);
#else
#else
double4
position
=
convertToDouble4
(
posq
[
index
]
);
double4
position
=
convertToDouble4
(
loadPos
(
posq
,
posqCorrection
,
index
)
);
#endif
#endif
double4
velocity
=
convertToDouble4
(
velm
[
index
]);
double4
velocity
=
convertToDouble4
(
velm
[
index
]);
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
8cda1b79
...
@@ -779,10 +779,11 @@ inline __device__ real3 loadForce(int index, long long* __restrict__ force) {
...
@@ -779,10 +779,11 @@ inline __device__ real3 loadForce(int index, long long* __restrict__ force) {
return
make_real3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]);
return
make_real3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]);
}
}
inline
__device__
void
storeForce
(
int
index
,
long
long
*
__restrict__
force
,
real3
value
)
{
inline
__device__
void
addForce
(
int
index
,
long
long
*
__restrict__
force
,
real3
value
)
{
force
[
index
]
=
(
long
long
)
(
value
.
x
*
0xFFFFFFFF
);
unsigned
long
long
*
f
=
(
unsigned
long
long
*
)
force
;
force
[
index
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
value
.
y
*
0xFFFFFFFF
);
atomicAdd
(
&
f
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
.
x
*
0xFFFFFFFF
)));
force
[
index
+
2
*
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
value
.
z
*
0xFFFFFFFF
);
atomicAdd
(
&
f
[
index
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
[
index
+
PADDED_NUM_ATOMS
*
2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
.
z
*
0xFFFFFFFF
)));
}
}
/**
/**
...
@@ -799,12 +800,8 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
...
@@ -799,12 +800,8 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
int4
atoms
=
avg2Atoms
[
index
];
int4
atoms
=
avg2Atoms
[
index
];
real2
weights
=
avg2Weights
[
index
];
real2
weights
=
avg2Weights
[
index
];
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f1
=
loadForce
(
atoms
.
y
,
force
);
addForce
(
atoms
.
y
,
force
,
f
*
weights
.
x
);
real3
f2
=
loadForce
(
atoms
.
z
,
force
);
addForce
(
atoms
.
z
,
force
,
f
*
weights
.
y
);
f1
+=
f
*
weights
.
x
;
f2
+=
f
*
weights
.
y
;
storeForce
(
atoms
.
y
,
force
,
f1
);
storeForce
(
atoms
.
z
,
force
,
f2
);
}
}
// Three particle average sites.
// Three particle average sites.
...
@@ -813,15 +810,9 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
...
@@ -813,15 +810,9 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
int4
atoms
=
avg3Atoms
[
index
];
int4
atoms
=
avg3Atoms
[
index
];
real4
weights
=
avg3Weights
[
index
];
real4
weights
=
avg3Weights
[
index
];
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f1
=
loadForce
(
atoms
.
y
,
force
);
addForce
(
atoms
.
y
,
force
,
f
*
weights
.
x
);
real3
f2
=
loadForce
(
atoms
.
z
,
force
);
addForce
(
atoms
.
z
,
force
,
f
*
weights
.
y
);
real3
f3
=
loadForce
(
atoms
.
w
,
force
);
addForce
(
atoms
.
w
,
force
,
f
*
weights
.
z
);
f1
+=
f
*
weights
.
x
;
f2
+=
f
*
weights
.
y
;
f3
+=
f
*
weights
.
z
;
storeForce
(
atoms
.
y
,
force
,
f1
);
storeForce
(
atoms
.
z
,
force
,
f2
);
storeForce
(
atoms
.
w
,
force
,
f3
);
}
}
// Out of plane sites.
// Out of plane sites.
...
@@ -835,20 +826,14 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
...
@@ -835,20 +826,14 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
mixed4
v12
=
pos2
-
pos1
;
mixed4
v12
=
pos2
-
pos1
;
mixed4
v13
=
pos3
-
pos1
;
mixed4
v13
=
pos3
-
pos1
;
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f1
=
loadForce
(
atoms
.
y
,
force
);
real3
f2
=
loadForce
(
atoms
.
z
,
force
);
real3
f3
=
loadForce
(
atoms
.
w
,
force
);
real3
fp2
=
make_real3
((
real
)
(
weights
.
x
*
f
.
x
-
weights
.
z
*
v13
.
z
*
f
.
y
+
weights
.
z
*
v13
.
y
*
f
.
z
),
real3
fp2
=
make_real3
((
real
)
(
weights
.
x
*
f
.
x
-
weights
.
z
*
v13
.
z
*
f
.
y
+
weights
.
z
*
v13
.
y
*
f
.
z
),
(
real
)
(
weights
.
z
*
v13
.
z
*
f
.
x
+
weights
.
x
*
f
.
y
-
weights
.
z
*
v13
.
x
*
f
.
z
),
(
real
)
(
weights
.
z
*
v13
.
z
*
f
.
x
+
weights
.
x
*
f
.
y
-
weights
.
z
*
v13
.
x
*
f
.
z
),
(
real
)
(
-
weights
.
z
*
v13
.
y
*
f
.
x
+
weights
.
z
*
v13
.
x
*
f
.
y
+
weights
.
x
*
f
.
z
));
(
real
)
(
-
weights
.
z
*
v13
.
y
*
f
.
x
+
weights
.
z
*
v13
.
x
*
f
.
y
+
weights
.
x
*
f
.
z
));
real3
fp3
=
make_real3
((
real
)
(
weights
.
y
*
f
.
x
+
weights
.
z
*
v12
.
z
*
f
.
y
-
weights
.
z
*
v12
.
y
*
f
.
z
),
real3
fp3
=
make_real3
((
real
)
(
weights
.
y
*
f
.
x
+
weights
.
z
*
v12
.
z
*
f
.
y
-
weights
.
z
*
v12
.
y
*
f
.
z
),
(
real
)
(
-
weights
.
z
*
v12
.
z
*
f
.
x
+
weights
.
y
*
f
.
y
+
weights
.
z
*
v12
.
x
*
f
.
z
),
(
real
)
(
-
weights
.
z
*
v12
.
z
*
f
.
x
+
weights
.
y
*
f
.
y
+
weights
.
z
*
v12
.
x
*
f
.
z
),
(
real
)
(
weights
.
z
*
v12
.
y
*
f
.
x
-
weights
.
z
*
v12
.
x
*
f
.
y
+
weights
.
y
*
f
.
z
));
(
real
)
(
weights
.
z
*
v12
.
y
*
f
.
x
-
weights
.
z
*
v12
.
x
*
f
.
y
+
weights
.
y
*
f
.
z
));
f1
+=
f
-
fp2
-
fp3
;
addForce
(
atoms
.
y
,
force
,
f
-
fp2
-
fp3
);
f2
+=
fp2
;
addForce
(
atoms
.
z
,
force
,
fp2
);
f3
+=
fp3
;
addForce
(
atoms
.
w
,
force
,
fp3
);
storeForce
(
atoms
.
y
,
force
,
f1
);
storeForce
(
atoms
.
z
,
force
,
f2
);
storeForce
(
atoms
.
w
,
force
,
f3
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment