Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
88224d12
Unverified
Commit
88224d12
authored
Dec 11, 2019
by
Andy Simmonett
Browse files
Fix OpenCL and CUDA kernels after review
parent
1fea6058
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
12 deletions
+27
-12
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+10
-3
platforms/cuda/src/kernels/noseHooverChain.cu
platforms/cuda/src/kernels/noseHooverChain.cu
+4
-2
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+11
-5
No files found.
platforms/cuda/include/CudaKernels.h
View file @
88224d12
...
...
@@ -1769,7 +1769,7 @@ public:
private:
int
sumWorkGroupSize
;
CudaContext
&
cu
;
CudaArray
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
CudaArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
CudaArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
CUfunction
reduceEnergyKernel
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
88224d12
...
...
@@ -8428,6 +8428,13 @@ void CudaNoseHooverChainKernel::initialize() {
computePairsKineticEnergyKernel = cu.getKernel(module, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cu.getKernel(module, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cu.getKernel(module, "scalePairsVelocities");
int energyBufferSize = cu.getEnergyBuffer().getSize();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
energyBuffer.initialize<double2>(cu, energyBufferSize, "energyBuffer");
} else {
energyBuffer.initialize<float2>(cu, energyBufferSize, "energyBuffer");
}
}
std::pair<double, double> CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
...
...
@@ -8709,9 +8716,9 @@ std::pair<double, double> CudaNoseHooverChainKernel::computeMaskedKineticEnergy(
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int bufferSize =
cu.getE
nergyBuffer
()
.getSize()
/ 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
void* args2[] = {&
cu.getE
nergyBuffer
()
.getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize,
2*
sumWorkGroupSize*
cu.getE
nergyBuffer
()
.getElementSize());
int bufferSize =
e
nergyBuffer.getSize()
;
void* args2[] = {&
e
nergyBuffer.getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, sumWorkGroupSize*
e
nergyBuffer.getElementSize());
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
...
...
platforms/cuda/src/kernels/noseHooverChain.cu
View file @
88224d12
...
...
@@ -125,8 +125,8 @@ extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFacto
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom1
=
pairs
[
index
].
x
;
int
atom2
=
pairs
[
index
].
y
;
mixed4
&
v1
=
velm
[
atom1
];
mixed4
&
v2
=
velm
[
atom2
];
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
...
...
@@ -143,6 +143,8 @@ extern "C" __global__ void scalePairsVelocities(mixed2 * __restrict__ scaleFacto
v2
.
x
=
absScale
*
cv
.
x
+
relScale
*
rv
.
x
*
m1
/
(
m1
+
m2
);
v2
.
y
=
absScale
*
cv
.
y
+
relScale
*
rv
.
y
*
m1
/
(
m1
+
m2
);
v2
.
z
=
absScale
*
cv
.
z
+
relScale
*
rv
.
z
*
m1
/
(
m1
+
m2
);
velm
[
atom1
]
=
v1
;
velm
[
atom2
]
=
v2
;
}
}
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
88224d12
...
...
@@ -1761,7 +1761,7 @@ public:
private:
int
sumWorkGroupSize
;
OpenCLContext
&
cl
;
OpenCLArray
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
OpenCLArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
OpenCLArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
cl
::
Kernel
>
propagateKernels
;
cl
::
Kernel
reduceEnergyKernel
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
88224d12
...
...
@@ -8855,6 +8855,12 @@ void OpenCLNoseHooverChainKernel::initialize() {
computePairsKineticEnergyKernel = cl::Kernel(program, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cl::Kernel(program, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cl::Kernel(program, "scalePairsVelocities");
int energyBufferSize = cl.getEnergyBuffer().getSize();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
energyBuffer.initialize<mm_double2>(cl, energyBufferSize, "energyBuffer");
} else {
energyBuffer.initialize<mm_float2>(cl, energyBufferSize, "energyBuffer");
}
}
std::pair<double, double> OpenCLNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
...
...
@@ -9145,28 +9151,28 @@ std::pair<double, double> OpenCLNoseHooverChainKernel::computeMaskedKineticEnerg
}
cl.clearBuffer(cl.getEnergyBuffer());
if (nAtoms) {
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(0,
cl.getE
nergyBuffer
()
.getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(0,
e
nergyBuffer.getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl_int>(1, nAtoms);
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(3, atomlists[chainID].getDeviceBuffer());
cl.executeKernel(computeAtomsKineticEnergyKernel, nAtoms);
}
if (nPairs) {
computePairsKineticEnergyKernel.setArg<cl::Buffer>(0,
cl.getE
nergyBuffer
()
.getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl::Buffer>(0,
e
nergyBuffer.getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl_int>(1, nPairs);
computePairsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl::Buffer>(3, pairlists[chainID].getDeviceBuffer());
cl.executeKernel(computePairsKineticEnergyKernel, nPairs);
}
int bufferSize =
cl.getE
nergyBuffer
()
.getSize()
/ 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
int bufferSize =
e
nergyBuffer.getSize()
;
int workGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512)
workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0,
cl.getE
nergyBuffer
()
.getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(0,
e
nergyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, bufferSize);
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4,
2*
workGroupSize*
cl.getE
nergyBuffer
()
.getElementSize(), NULL);
reduceEnergyKernel.setArg(4, workGroupSize*
e
nergyBuffer.getElementSize(), NULL);
cl.executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
std::pair<double, double> KEs = {0, 0};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment