Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cc279e94
"openmmapi/src/LangevinMiddleIntegrator.cpp" did not exist on "46b35a3627ad007f7bf5aea7f3a5b3c6a66f57f8"
Commit
cc279e94
authored
Jul 05, 2013
by
peastman
Browse files
Fixed bugs in handling of random numbers in CustomIntegrator (see bug 1883).
parent
7ba1a10c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
56 deletions
+82
-56
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+37
-21
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+3
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+39
-25
platforms/opencl/src/kernels/customIntegratorPerDof.cl
platforms/opencl/src/kernels/customIntegratorPerDof.cl
+3
-5
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
cc279e94
...
@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Initialize the random number generator.
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Build a list of all variables that affect the forces, so we can tell which
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
// steps invalidate them.
...
@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
int
numGaussian
=
0
,
numUniform
=
0
;
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
variable
[
j
]
==
"x"
)
{
...
@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += blockDim.x*gridDim.x;
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += blockDim.x*gridDim.x;
\n
"
;
compute
<<
"}
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Create the kernel for summing the potential energy.
// Create the kernel for summing the potential energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
...
@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs
.
push_back
(
&
globalValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
globalValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
()
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
...
@@ -5161,7 +5172,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -5161,7 +5172,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
...
@@ -5176,7 +5189,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -5176,7 +5189,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
clearBuffer
(
*
sumBuffer
);
...
@@ -5227,6 +5242,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
...
@@ -5227,6 +5242,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
int
randomIndex
=
0
;
int
randomIndex
=
0
;
kineticEnergyArgs
[
1
]
=
&
posCorrection
;
kineticEnergyArgs
[
1
]
=
&
posCorrection
;
kineticEnergyArgs
[
9
]
=
&
cu
.
getIntegrationUtilities
().
getRandom
().
getDevicePointer
();
kineticEnergyArgs
[
10
]
=
&
randomIndex
;
kineticEnergyArgs
[
10
]
=
&
randomIndex
;
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
...
...
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
cc279e94
...
@@ -34,11 +34,12 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
...
@@ -34,11 +34,12 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
random
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
gaussian
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
gaussianIndex
+=
index
;
int
uniformIndex
=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
#ifdef LOAD_POS_AS_DELTA
...
@@ -50,11 +51,8 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
...
@@ -50,11 +51,8 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double
mass
=
1.0
/
velocity
.
w
;
double
mass
=
1.0
/
velocity
.
w
;
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
float4
gaussian
=
gaussianValues
[
randomIndex
];
float4
uniform
=
uniformValues
[
index
];
COMPUTE_STEP
COMPUTE_STEP
}
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
platforms/opencl/src/OpenCLKernels.cpp
View file @
cc279e94
...
@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
// Initialize the random number generator.
uniformRandoms
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
cl
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
cl
::
Program
randomProgram
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cl
::
Kernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
uniformRandoms
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
randomSeed
->
getDeviceBuffer
());
// Build a list of all variables that affect the forces, so we can tell which
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
// steps invalidate them.
...
@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
}
int
numGaussian
=
0
,
numUniform
=
0
;
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
variable
[
j
]
==
"x"
)
{
...
@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += get_global_size(0);
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += get_global_size(0);
\n
"
;
compute
<<
"}
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
integration
.
getRandom
().
getDeviceBuffer
());
index
+=
3
;
index
++
;
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
@@ -5229,6 +5217,27 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5229,6 +5217,27 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
}
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
cl
::
Program
randomProgram
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cl
::
Kernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
uniformRandoms
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
randomSeed
->
getDeviceBuffer
());
// Create the kernel for summing the potential energy.
// Create the kernel for summing the potential energy.
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
...
@@ -5274,8 +5283,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -5274,8 +5283,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
integration
.
getRandom
().
getDeviceBuffer
());
index
+=
2
;
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
0
);
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
...
@@ -5392,6 +5400,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -5392,6 +5400,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
9
,
integration
.
getRandom
().
getDeviceBuffer
());
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
11
,
uniformRandoms
->
getDeviceBuffer
());
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
kernels
[
i
][
0
],
numAtoms
);
cl
.
executeKernel
(
kernels
[
i
][
0
],
numAtoms
);
...
@@ -5403,6 +5413,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -5403,6 +5413,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
9
,
integration
.
getRandom
().
getDeviceBuffer
());
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
11
,
uniformRandoms
->
getDeviceBuffer
());
if
(
requiredUniform
[
i
]
>
0
)
if
(
requiredUniform
[
i
]
>
0
)
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
clearBuffer
(
*
sumBuffer
);
cl
.
clearBuffer
(
*
sumBuffer
);
...
@@ -5454,6 +5466,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
...
@@ -5454,6 +5466,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
forcesAreValid
=
true
;
forcesAreValid
=
true
;
}
}
cl
.
clearBuffer
(
*
sumBuffer
);
cl
.
clearBuffer
(
*
sumBuffer
);
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
cl
.
getIntegrationUtilities
().
getRandom
().
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
10
,
0
);
cl
.
executeKernel
(
kineticEnergyKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
kineticEnergyKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
sumKineticEnergyKernel
,
OpenCLContext
::
ThreadBlockSize
,
OpenCLContext
::
ThreadBlockSize
);
cl
.
executeKernel
(
sumKineticEnergyKernel
,
OpenCLContext
::
ThreadBlockSize
,
OpenCLContext
::
ThreadBlockSize
);
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
...
...
platforms/opencl/src/kernels/customIntegratorPerDof.cl
View file @
cc279e94
...
@@ -26,11 +26,12 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
...
@@ -26,11 +26,12 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel
void
computePerDof
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta,
__kernel
void
computePerDof
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed2*
restrict
dt,
__global
const
mixed*
restrict
globals,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed2*
restrict
dt,
__global
const
mixed*
restrict
globals,
__global
const
mixed*
restrict
params,
__global
mixed*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
__global
const
mixed*
restrict
params,
__global
mixed*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
unsigned
int
random
Index,
__global
const
float4*
restrict
uniformValues,
__global
const
real*
restrict
energy
unsigned
int
gaussian
Index,
__global
const
float4*
restrict
uniformValues,
__global
const
real*
restrict
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt[0].y
;
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
randomIndex
+=
index
;
gaussianIndex
+=
index
;
int
uniformIndex
=
index
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
#
ifdef
LOAD_POS_AS_DELTA
#
ifdef
LOAD_POS_AS_DELTA
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
...
@@ -41,11 +42,8 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr
...
@@ -41,11 +42,8 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr
real4
f
=
force[index]
;
real4
f
=
force[index]
;
mixed
mass
=
1/velocity.w
;
mixed
mass
=
1/velocity.w
;
if
(
velocity.w
!=
0.0
)
{
if
(
velocity.w
!=
0.0
)
{
float4
gaussian
=
gaussianValues[randomIndex]
;
float4
uniform
=
uniformValues[index]
;
COMPUTE_STEP
COMPUTE_STEP
}
}
randomIndex
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment