Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8e725a70
Commit
8e725a70
authored
Jul 09, 2013
by
Lee-Ping
Browse files
Merge branch 'master' of
https://github.com/SimTk/openmm
parents
4334eca9
0e5073cf
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
232 additions
and
68 deletions
+232
-68
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+42
-25
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+3
-1
platforms/cuda/src/kernels/customIntegrator.cu
platforms/cuda/src/kernels/customIntegrator.cu
+2
-2
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+3
-5
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
+67
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+43
-28
platforms/opencl/src/kernels/customIntegrator.cl
platforms/opencl/src/kernels/customIntegrator.cl
+2
-2
platforms/opencl/src/kernels/customIntegratorPerDof.cl
platforms/opencl/src/kernels/customIntegratorPerDof.cl
+3
-5
platforms/opencl/tests/TestOpenCLCustomIntegrator.cpp
platforms/opencl/tests/TestOpenCLCustomIntegrator.cpp
+67
-0
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
8e725a70
...
@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Initialize the random number generator.
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Build a list of all variables that affect the forces, so we can tell which
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
// steps invalidate them.
...
@@ -4858,10 +4842,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4858,10 +4842,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
)
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
usesVariable
(
expression
[
step
],
"uniform"
)
&&
!
usesVariable
(
expression
[
step
],
"gaussian"
))
merged
[
step
]
=
true
;
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
)
!
usesVariable
(
expression
[
step
],
"uniform"
))
merged
[
step
]
=
true
;
merged
[
step
]
=
true
;
}
}
...
@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
int
numGaussian
=
0
,
numUniform
=
0
;
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex+index];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex+index];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
variable
[
j
]
==
"x"
)
{
...
@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += NUM_ATOMS;
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += NUM_ATOMS;
\n
"
;
compute
<<
"}
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Create the kernel for summing the potential energy.
// Create the kernel for summing the potential energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
...
@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs
.
push_back
(
&
globalValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
globalValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
()
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
...
@@ -5112,7 +5123,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -5112,7 +5123,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Loop over computation steps in the integrator and execute them.
// Loop over computation steps in the integrator and execute them.
void
*
randomArgs
[]
=
{
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
int
maxUniformRandoms
=
uniformRandoms
->
getSize
();
void
*
randomArgs
[]
=
{
&
maxUniformRandoms
,
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
int
lastForceGroups
=
context
.
getLastForceGroups
();
int
lastForceGroups
=
context
.
getLastForceGroups
();
...
@@ -5161,7 +5173,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -5161,7 +5173,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
...
@@ -5176,7 +5190,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -5176,7 +5190,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
clearBuffer
(
*
sumBuffer
);
...
@@ -5227,6 +5243,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
...
@@ -5227,6 +5243,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
int
randomIndex
=
0
;
int
randomIndex
=
0
;
kineticEnergyArgs
[
1
]
=
&
posCorrection
;
kineticEnergyArgs
[
1
]
=
&
posCorrection
;
kineticEnergyArgs
[
9
]
=
&
cu
.
getIntegrationUtilities
().
getRandom
().
getDevicePointer
();
kineticEnergyArgs
[
10
]
=
&
randomIndex
;
kineticEnergyArgs
[
10
]
=
&
randomIndex
;
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
8e725a70
...
@@ -457,7 +457,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -457,7 +457,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
bool
useShuffle
=
(
context
.
getComputeCapability
()
>=
3.0
);
int
cudaVersion
;
cuDriverGetVersion
(
&
cudaVersion
);
bool
useShuffle
=
(
context
.
getComputeCapability
()
>=
3.0
&&
cudaVersion
>=
5050
);
// Part 1. Defines for on diagonal exclusion tiles
// Part 1. Defines for on diagonal exclusion tiles
stringstream
loadLocal1
;
stringstream
loadLocal1
;
...
...
platforms/cuda/src/kernels/customIntegrator.cu
View file @
8e725a70
...
@@ -52,10 +52,10 @@ extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4*
...
@@ -52,10 +52,10 @@ extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4*
}
}
}
}
extern
"C"
__global__
void
generateRandomNumbers
(
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
extern
"C"
__global__
void
generateRandomNumbers
(
int
numValues
,
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
uint4
state
=
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
];
uint4
state
=
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
];
unsigned
int
carry
=
0
;
unsigned
int
carry
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numValues
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Generate three uniform random numbers.
// Generate three uniform random numbers.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
x
=
state
.
x
*
69069
+
1
;
...
...
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
8e725a70
...
@@ -34,11 +34,10 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
...
@@ -34,11 +34,10 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
random
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
gaussianBase
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
#ifdef LOAD_POS_AS_DELTA
...
@@ -50,11 +49,10 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
...
@@ -50,11 +49,10 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double
mass
=
1.0
/
velocity
.
w
;
double
mass
=
1.0
/
velocity
.
w
;
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
float4
gaussian
=
gaussian
Values
[
random
Index
]
;
int
gaussian
Index
=
gaussian
Base
Index
;
float4
uniform
=
uniformValues
[
index
]
;
int
uniform
Index
=
0
;
COMPUTE_STEP
COMPUTE_STEP
}
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
View file @
8e725a70
...
@@ -651,6 +651,72 @@ void testRespa() {
...
@@ -651,6 +651,72 @@ void testRespa() {
}
}
}
}
/**
* Make sure random numbers are computed correctly when steps get merged.
*/
void
testMergedRandoms
()
{
const
int
numParticles
=
10
;
const
int
numSteps
=
10
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addPerDofVariable
(
"dofUniform1"
,
0
);
integrator
.
addPerDofVariable
(
"dofUniform2"
,
0
);
integrator
.
addPerDofVariable
(
"dofGaussian1"
,
0
);
integrator
.
addPerDofVariable
(
"dofGaussian2"
,
0
);
integrator
.
addGlobalVariable
(
"globalUniform1"
,
0
);
integrator
.
addGlobalVariable
(
"globalUniform2"
,
0
);
integrator
.
addGlobalVariable
(
"globalGaussian1"
,
0
);
integrator
.
addGlobalVariable
(
"globalGaussian2"
,
0
);
integrator
.
addComputePerDof
(
"dofUniform1"
,
"uniform"
);
integrator
.
addComputePerDof
(
"dofUniform2"
,
"uniform"
);
integrator
.
addComputePerDof
(
"dofGaussian1"
,
"gaussian"
);
integrator
.
addComputePerDof
(
"dofGaussian2"
,
"gaussian"
);
integrator
.
addComputeGlobal
(
"globalUniform1"
,
"uniform"
);
integrator
.
addComputeGlobal
(
"globalUniform2"
,
"uniform"
);
integrator
.
addComputeGlobal
(
"globalGaussian1"
,
"gaussian"
);
integrator
.
addComputeGlobal
(
"globalGaussian2"
,
"gaussian"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the random numbers are computed correctly.
vector
<
Vec3
>
values1
,
values2
;
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integrator
.
step
(
1
);
integrator
.
getPerDofVariable
(
0
,
values1
);
integrator
.
getPerDofVariable
(
1
,
values2
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v1
=
values1
[
i
][
j
];
double
v2
=
values2
[
i
][
j
];
ASSERT
(
v1
>=
0
&&
v1
<
1
);
ASSERT
(
v2
>=
0
&&
v2
<
1
);
ASSERT
(
v1
!=
v2
);
}
integrator
.
getPerDofVariable
(
2
,
values1
);
integrator
.
getPerDofVariable
(
3
,
values2
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v1
=
values1
[
i
][
j
];
double
v2
=
values2
[
i
][
j
];
ASSERT
(
v1
>=
-
10
&&
v1
<
10
);
ASSERT
(
v2
>=
-
10
&&
v2
<
10
);
ASSERT
(
v1
!=
v2
);
}
double
v1
=
integrator
.
getGlobalVariable
(
0
);
double
v2
=
integrator
.
getGlobalVariable
(
1
);
ASSERT
(
v1
>=
0
&&
v1
<
1
);
ASSERT
(
v2
>=
0
&&
v2
<
1
);
ASSERT
(
v1
!=
v2
);
v1
=
integrator
.
getGlobalVariable
(
2
);
v2
=
integrator
.
getGlobalVariable
(
3
);
ASSERT
(
v1
>=
-
10
&&
v1
<
10
);
ASSERT
(
v2
>=
-
10
&&
v2
<
10
);
ASSERT
(
v1
!=
v2
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
...
@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
...
@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
testPerDofVariables
();
testPerDofVariables
();
testForceGroups
();
testForceGroups
();
testRespa
();
testRespa
();
testMergedRandoms
();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
8e725a70
...
@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
// Initialize the random number generator.
uniformRandoms
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
cl
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
cl
::
Program
randomProgram
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cl
::
Kernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
uniformRandoms
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
randomSeed
->
getDeviceBuffer
());
// Build a list of all variables that affect the forces, so we can tell which
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
// steps invalidate them.
...
@@ -5088,10 +5070,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5088,10 +5070,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
)
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
usesVariable
(
expression
[
step
],
"uniform"
)
&&
!
usesVariable
(
expression
[
step
],
"gaussian"
))
merged
[
step
]
=
true
;
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
)
!
usesVariable
(
expression
[
step
],
"uniform"
))
merged
[
step
]
=
true
;
merged
[
step
]
=
true
;
}
}
...
@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
}
int
numGaussian
=
0
,
numUniform
=
0
;
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex+index];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex+index];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
variable
[
j
]
==
"x"
)
{
...
@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += NUM_ATOMS;
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += NUM_ATOMS;
\n
"
;
compute
<<
"}
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
integration
.
getRandom
().
getDeviceBuffer
());
index
+=
3
;
index
++
;
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
@@ -5229,6 +5217,28 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5229,6 +5217,28 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
}
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
cl
::
Program
randomProgram
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cl
::
Kernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
.
setArg
<
cl_int
>
(
0
,
maxUniformRandoms
);
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
uniformRandoms
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
randomSeed
->
getDeviceBuffer
());
// Create the kernel for summing the potential energy.
// Create the kernel for summing the potential energy.
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
...
@@ -5274,8 +5284,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5274,8 +5284,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
integration
.
getRandom
().
getDeviceBuffer
());
index
+=
2
;
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
0
);
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
...
@@ -5392,6 +5401,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -5392,6 +5401,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
9
,
integration
.
getRandom
().
getDeviceBuffer
());
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
11
,
uniformRandoms
->
getDeviceBuffer
());
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
kernels
[
i
][
0
],
numAtoms
);
cl
.
executeKernel
(
kernels
[
i
][
0
],
numAtoms
);
...
@@ -5403,6 +5414,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -5403,6 +5414,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
9
,
integration
.
getRandom
().
getDeviceBuffer
());
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
11
,
uniformRandoms
->
getDeviceBuffer
());
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
clearBuffer
(
*
sumBuffer
);
cl
.
clearBuffer
(
*
sumBuffer
);
...
@@ -5454,6 +5467,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
...
@@ -5454,6 +5467,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
forcesAreValid
=
true
;
forcesAreValid
=
true
;
}
}
cl
.
clearBuffer
(
*
sumBuffer
);
cl
.
clearBuffer
(
*
sumBuffer
);
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
cl
.
getIntegrationUtilities
().
getRandom
().
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
10
,
0
);
cl
.
executeKernel
(
kineticEnergyKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
kineticEnergyKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
sumKineticEnergyKernel
,
OpenCLContext
::
ThreadBlockSize
,
OpenCLContext
::
ThreadBlockSize
);
cl
.
executeKernel
(
sumKineticEnergyKernel
,
OpenCLContext
::
ThreadBlockSize
,
OpenCLContext
::
ThreadBlockSize
);
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
...
...
platforms/opencl/src/kernels/customIntegrator.cl
View file @
8e725a70
...
@@ -52,10 +52,10 @@ __kernel void applyPositionDeltas(__global real4* restrict posq, __global real4*
...
@@ -52,10 +52,10 @@ __kernel void applyPositionDeltas(__global real4* restrict posq, __global real4*
}
}
}
}
__kernel
void
generateRandomNumbers
(
__global
float4*
restrict
random,
__global
uint4*
restrict
seed
)
{
__kernel
void
generateRandomNumbers
(
int
numValues,
__global
float4*
restrict
random,
__global
uint4*
restrict
seed
)
{
uint4
state
=
seed[get_global_id
(
0
)
]
;
uint4
state
=
seed[get_global_id
(
0
)
]
;
unsigned
int
carry
=
0
;
unsigned
int
carry
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index <
NUM_ATOMS
; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index <
numValues
; index += get_global_size(0)) {
//
Generate
three
uniform
random
numbers.
//
Generate
three
uniform
random
numbers.
state.x
=
state.x
*
69069
+
1
;
state.x
=
state.x
*
69069
+
1
;
...
...
platforms/opencl/src/kernels/customIntegratorPerDof.cl
View file @
8e725a70
...
@@ -26,11 +26,10 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
...
@@ -26,11 +26,10 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel
void
computePerDof
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta,
__kernel
void
computePerDof
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed2*
restrict
dt,
__global
const
mixed*
restrict
globals,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed2*
restrict
dt,
__global
const
mixed*
restrict
globals,
__global
const
mixed*
restrict
params,
__global
mixed*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
__global
const
mixed*
restrict
params,
__global
mixed*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
unsigned
int
random
Index,
__global
const
float4*
restrict
uniformValues,
__global
const
real*
restrict
energy
unsigned
int
gaussianBase
Index,
__global
const
float4*
restrict
uniformValues,
__global
const
real*
restrict
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt[0].y
;
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
randomIndex
+=
index
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
#
ifdef
LOAD_POS_AS_DELTA
#
ifdef
LOAD_POS_AS_DELTA
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
...
@@ -41,11 +40,10 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr
...
@@ -41,11 +40,10 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr
real4
f
=
force[index]
;
real4
f
=
force[index]
;
mixed
mass
=
1/velocity.w
;
mixed
mass
=
1/velocity.w
;
if
(
velocity.w
!=
0.0
)
{
if
(
velocity.w
!=
0.0
)
{
float4
gaussian
=
gaussian
Values[random
Index
]
;
int
gaussian
Index
=
gaussian
Base
Index
;
float4
uniform
=
uniformValues[index]
;
int
uniform
Index
=
0
;
COMPUTE_STEP
COMPUTE_STEP
}
}
randomIndex
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
}
}
platforms/opencl/tests/TestOpenCLCustomIntegrator.cpp
View file @
8e725a70
...
@@ -651,6 +651,72 @@ void testRespa() {
...
@@ -651,6 +651,72 @@ void testRespa() {
}
}
}
}
/**
* Make sure random numbers are computed correctly when steps get merged.
*/
void
testMergedRandoms
()
{
const
int
numParticles
=
10
;
const
int
numSteps
=
10
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addPerDofVariable
(
"dofUniform1"
,
0
);
integrator
.
addPerDofVariable
(
"dofUniform2"
,
0
);
integrator
.
addPerDofVariable
(
"dofGaussian1"
,
0
);
integrator
.
addPerDofVariable
(
"dofGaussian2"
,
0
);
integrator
.
addGlobalVariable
(
"globalUniform1"
,
0
);
integrator
.
addGlobalVariable
(
"globalUniform2"
,
0
);
integrator
.
addGlobalVariable
(
"globalGaussian1"
,
0
);
integrator
.
addGlobalVariable
(
"globalGaussian2"
,
0
);
integrator
.
addComputePerDof
(
"dofUniform1"
,
"uniform"
);
integrator
.
addComputePerDof
(
"dofUniform2"
,
"uniform"
);
integrator
.
addComputePerDof
(
"dofGaussian1"
,
"gaussian"
);
integrator
.
addComputePerDof
(
"dofGaussian2"
,
"gaussian"
);
integrator
.
addComputeGlobal
(
"globalUniform1"
,
"uniform"
);
integrator
.
addComputeGlobal
(
"globalUniform2"
,
"uniform"
);
integrator
.
addComputeGlobal
(
"globalGaussian1"
,
"gaussian"
);
integrator
.
addComputeGlobal
(
"globalGaussian2"
,
"gaussian"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the random numbers are computed correctly.
vector
<
Vec3
>
values1
,
values2
;
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integrator
.
step
(
1
);
integrator
.
getPerDofVariable
(
0
,
values1
);
integrator
.
getPerDofVariable
(
1
,
values2
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v1
=
values1
[
i
][
j
];
double
v2
=
values2
[
i
][
j
];
ASSERT
(
v1
>=
0
&&
v1
<
1
);
ASSERT
(
v2
>=
0
&&
v2
<
1
);
ASSERT
(
v1
!=
v2
);
}
integrator
.
getPerDofVariable
(
2
,
values1
);
integrator
.
getPerDofVariable
(
3
,
values2
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v1
=
values1
[
i
][
j
];
double
v2
=
values2
[
i
][
j
];
ASSERT
(
v1
>=
-
10
&&
v1
<
10
);
ASSERT
(
v2
>=
-
10
&&
v2
<
10
);
ASSERT
(
v1
!=
v2
);
}
double
v1
=
integrator
.
getGlobalVariable
(
0
);
double
v2
=
integrator
.
getGlobalVariable
(
1
);
ASSERT
(
v1
>=
0
&&
v1
<
1
);
ASSERT
(
v2
>=
0
&&
v2
<
1
);
ASSERT
(
v1
!=
v2
);
v1
=
integrator
.
getGlobalVariable
(
2
);
v2
=
integrator
.
getGlobalVariable
(
3
);
ASSERT
(
v1
>=
-
10
&&
v1
<
10
);
ASSERT
(
v2
>=
-
10
&&
v2
<
10
);
ASSERT
(
v1
!=
v2
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
...
@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
...
@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
testPerDofVariables
();
testPerDofVariables
();
testForceGroups
();
testForceGroups
();
testRespa
();
testRespa
();
testMergedRandoms
();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment