Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cc279e94
Commit
cc279e94
authored
Jul 05, 2013
by
peastman
Browse files
Fixed bugs in handling of random numbers in CustomIntegrator (see bug 1883).
parent
7ba1a10c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
56 deletions
+82
-56
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+37
-21
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+3
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+39
-25
platforms/opencl/src/kernels/customIntegratorPerDof.cl
platforms/opencl/src/kernels/customIntegratorPerDof.cl
+3
-5
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
cc279e94
...
...
@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Initialize the random number generator.
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
...
...
@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
...
...
@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += blockDim.x*gridDim.x;
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += blockDim.x*gridDim.x;
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
...
@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
...
@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Create the kernel for summing the potential energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
...
...
@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs
.
push_back
(
&
globalValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
()
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
...
...
@@ -5161,7 +5172,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
...
...
@@ -5176,7 +5189,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
9
]
=
&
integration
.
getRandom
().
getDevicePointer
();
kernelArgs
[
i
][
0
][
10
]
=
&
randomIndex
;
kernelArgs
[
i
][
0
][
11
]
=
&
uniformRandoms
->
getDevicePointer
();
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
clearBuffer
(
*
sumBuffer
);
...
...
@@ -5227,6 +5242,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
int
randomIndex
=
0
;
kineticEnergyArgs
[
1
]
=
&
posCorrection
;
kineticEnergyArgs
[
9
]
=
&
cu
.
getIntegrationUtilities
().
getRandom
().
getDevicePointer
();
kineticEnergyArgs
[
10
]
=
&
randomIndex
;
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kineticEnergyKernel
,
&
kineticEnergyArgs
[
0
],
cu
.
getNumAtoms
());
...
...
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
cc279e94
...
...
@@ -34,11 +34,12 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
random
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
gaussian
Index
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
gaussianIndex
+=
index
;
int
uniformIndex
=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
...
...
@@ -50,11 +51,8 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double
mass
=
1.0
/
velocity
.
w
;
if
(
velocity
.
w
!=
0.0
)
{
float4
gaussian
=
gaussianValues
[
randomIndex
];
float4
uniform
=
uniformValues
[
index
];
COMPUTE_STEP
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/opencl/src/OpenCLKernels.cpp
View file @
cc279e94
...
...
@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
defines
[
"NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cl
.
intToString
(
OpenCLContext
::
ThreadBlockSize
);
// Initialize the random number generator.
uniformRandoms
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
cl
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
cl
::
Program
randomProgram
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cl
::
Kernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
uniformRandoms
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
randomSeed
->
getDeviceBuffer
());
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
...
...
@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
...
...
@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
compute
<<
"perDofValues"
<<
cl
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cl
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
if
(
numGaussian
>
0
)
compute
<<
"gaussianIndex += get_global_size(0);
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"uniformIndex += get_global_size(0);
\n
"
;
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
...
...
@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
integration
.
getRandom
().
getDeviceBuffer
());
index
++
;
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
index
+=
3
;
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
perDofValues
->
getBuffers
()[
i
].
getMemory
());
...
...
@@ -5229,6 +5217,27 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
}
// Initialize the random number generator.
int
maxUniformRandoms
=
1
;
for
(
int
i
=
0
;
i
<
(
int
)
requiredUniform
.
size
();
i
++
)
maxUniformRandoms
=
max
(
maxUniformRandoms
,
requiredUniform
[
i
]);
uniformRandoms
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
maxUniformRandoms
,
"uniformRandoms"
);
randomSeed
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
mm_int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
cl
::
Program
randomProgram
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cl
::
Kernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
uniformRandoms
->
getDeviceBuffer
());
randomKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
randomSeed
->
getDeviceBuffer
());
// Create the kernel for summing the potential energy.
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
customIntegrator
,
defines
);
...
...
@@ -5274,8 +5283,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globalValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
contextParameterValues
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
sumBuffer
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
integration
.
getRandom
().
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
0
);
index
+=
2
;
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
uniformRandoms
->
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
potentialEnergy
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
...
...
@@ -5392,6 +5400,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
9
,
integration
.
getRandom
().
getDeviceBuffer
());
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
11
,
uniformRandoms
->
getDeviceBuffer
());
if
(
requiredUniform
[
i
]
>
0
)
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
executeKernel
(
kernels
[
i
][
0
],
numAtoms
);
...
...
@@ -5403,6 +5413,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
kernels
[
i
][
0
].
setArg
<
cl_uint
>
(
10
,
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]));
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
9
,
integration
.
getRandom
().
getDeviceBuffer
());
kernels
[
i
][
0
].
setArg
<
cl
::
Buffer
>
(
11
,
uniformRandoms
->
getDeviceBuffer
());
if
(
requiredUniform
[
i
]
>
0
)
cl
.
executeKernel
(
randomKernel
,
numAtoms
);
cl
.
clearBuffer
(
*
sumBuffer
);
...
...
@@ -5454,6 +5466,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
forcesAreValid
=
true
;
}
cl
.
clearBuffer
(
*
sumBuffer
);
kineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
cl
.
getIntegrationUtilities
().
getRandom
().
getDeviceBuffer
());
kineticEnergyKernel
.
setArg
<
cl_uint
>
(
10
,
0
);
cl
.
executeKernel
(
kineticEnergyKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
sumKineticEnergyKernel
,
OpenCLContext
::
ThreadBlockSize
,
OpenCLContext
::
ThreadBlockSize
);
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
{
...
...
platforms/opencl/src/kernels/customIntegratorPerDof.cl
View file @
cc279e94
...
...
@@ -26,11 +26,12 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel
void
computePerDof
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed2*
restrict
dt,
__global
const
mixed*
restrict
globals,
__global
const
mixed*
restrict
params,
__global
mixed*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
unsigned
int
random
Index,
__global
const
float4*
restrict
uniformValues,
__global
const
real*
restrict
energy
unsigned
int
gaussian
Index,
__global
const
float4*
restrict
uniformValues,
__global
const
real*
restrict
energy
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
randomIndex
+=
index
;
gaussianIndex
+=
index
;
int
uniformIndex
=
index
;
while
(
index
<
NUM_ATOMS
)
{
#
ifdef
LOAD_POS_AS_DELTA
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
...
...
@@ -41,11 +42,8 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr
real4
f
=
force[index]
;
mixed
mass
=
1/velocity.w
;
if
(
velocity.w
!=
0.0
)
{
float4
gaussian
=
gaussianValues[randomIndex]
;
float4
uniform
=
uniformValues[index]
;
COMPUTE_STEP
}
randomIndex
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment