Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d9756688
Unverified
Commit
d9756688
authored
Feb 16, 2018
by
peastman
Committed by
GitHub
Feb 16, 2018
Browse files
Merge pull request #1992 from peastman/arrays
Simplification to CudaArray and OpenCLArray
parents
979525c7
c75dba47
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
104 additions
and
134 deletions
+104
-134
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+7
-9
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+39
-52
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+7
-8
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+44
-57
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+7
-8
No files found.
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
View file @
d9756688
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -45,9 +45,8 @@ namespace OpenMM {
class
OpenCLCalcDrudeForceKernel
:
public
CalcDrudeForceKernel
{
public:
OpenCLCalcDrudeForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcDrudeForceKernel
(
name
,
platform
),
cl
(
cl
)
,
particleParams
(
NULL
),
pairParams
(
NULL
)
{
CalcDrudeForceKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLCalcDrudeForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -73,8 +72,8 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
DrudeForce
&
force
);
private:
OpenCLContext
&
cl
;
OpenCLArray
*
particleParams
;
OpenCLArray
*
pairParams
;
OpenCLArray
particleParams
;
OpenCLArray
pairParams
;
};
/**
...
...
@@ -83,9 +82,8 @@ private:
class
OpenCLIntegrateDrudeLangevinStepKernel
:
public
IntegrateDrudeLangevinStepKernel
{
public:
OpenCLIntegrateDrudeLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateDrudeLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
normalParticles
(
NULL
),
pairParticles
(
NULL
)
{
IntegrateDrudeLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
{
}
~
OpenCLIntegrateDrudeLangevinStepKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -112,8 +110,8 @@ private:
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
double
prevStepSize
;
OpenCLArray
*
normalParticles
;
OpenCLArray
*
pairParticles
;
OpenCLArray
normalParticles
;
OpenCLArray
pairParticles
;
cl
::
Kernel
kernel1
,
kernel2
,
hardwallKernel
;
};
...
...
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
View file @
d9756688
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
}
}
CudaIntegrateRPMDStepKernel
::~
CudaIntegrateRPMDStepKernel
()
{
if
(
forces
!=
NULL
)
delete
forces
;
if
(
positions
!=
NULL
)
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
if
(
contractedForces
!=
NULL
)
delete
contractedForces
;
if
(
contractedPositions
!=
NULL
)
delete
contractedPositions
;
}
void
CudaIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
cu
.
getPlatformData
().
initializeContexts
(
system
);
numCopies
=
integrator
.
getNumCopies
();
...
...
@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
int
paddedParticles
=
cu
.
getPaddedNumAtoms
();
bool
useDoublePrecision
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
());
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
double4
)
:
sizeof
(
float4
));
forces
=
CudaArray
::
creat
e
<
long
long
>
(
cu
,
numCopies
*
paddedParticles
*
3
,
"rpmdForces"
);
positions
=
new
CudaArray
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
=
new
CudaArray
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
forces
.
initializ
e
<
long
long
>
(
cu
,
numCopies
*
paddedParticles
*
3
,
"rpmdForces"
);
positions
.
initialize
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
.
initialize
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if
(
useDoublePrecision
)
{
vector
<
double4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
vector
<
double4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
make_double4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
make_double4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
else
{
vector
<
float4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
vector
<
float4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
// Build a list of contractions.
...
...
@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
}
}
if
(
maxContractedCopies
>
0
)
{
contractedForces
=
CudaArray
::
creat
e
<
long
long
>
(
cu
,
maxContractedCopies
*
paddedParticles
*
3
,
"rpmdContractedForces"
);
contractedPositions
=
new
CudaArray
(
cu
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
contractedForces
.
initializ
e
<
long
long
>
(
cu
,
maxContractedCopies
*
paddedParticles
*
3
,
"rpmdContractedForces"
);
contractedPositions
.
initialize
(
cu
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
}
// Create kernels.
...
...
@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
float
frictionFloat
=
(
float
)
friction
;
void
*
frictionPtr
=
(
useDoublePrecision
?
(
void
*
)
&
friction
:
(
void
*
)
&
frictionFloat
);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
numParticles
*
numCopies
);
void
*
pileArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
,
dtPtr
,
kTPtr
,
frictionPtr
};
void
*
pileArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
,
dtPtr
,
kTPtr
,
frictionPtr
};
if
(
integrator
.
getApplyThermostat
())
cu
.
executeKernel
(
pileKernel
,
pileArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Update positions and velocities.
void
*
stepArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
dtPtr
,
kTPtr
};
void
*
stepArgs
[]
=
{
&
positions
.
getDevicePointer
(),
&
velocities
.
getDevicePointer
(),
&
forces
.
getDevicePointer
(),
dtPtr
,
kTPtr
};
cu
.
executeKernel
(
stepKernel
,
stepArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Calculate forces based on the updated positions.
...
...
@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// Update velocities.
void
*
velocitiesArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
dtPtr
};
void
*
velocitiesArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
forces
.
getDevicePointer
(),
dtPtr
};
cu
.
executeKernel
(
velocitiesKernel
,
velocitiesArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Apply the PILE-L thermostat again.
...
...
@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// the same translation to all the beads.
int
i
=
numCopies
-
1
;
void
*
args
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
void
*
args
[]
=
{
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
translateKernel
,
args
,
cu
.
getNumAtoms
());
}
}
...
...
@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
void
*
copyToContextArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
computeVirtualSites
();
...
...
@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if
(
initialBox
[
0
]
!=
finalBox
[
0
]
||
initialBox
[
1
]
!=
finalBox
[
1
]
||
initialBox
[
2
]
!=
finalBox
[
2
])
throw
OpenMMException
(
"Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead."
);
context
.
calcForcesAndEnergy
(
true
,
false
,
groupsNotContracted
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
}
...
...
@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Find the contracted positions.
void
*
contractPosArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
()};
void
*
contractPosArgs
[]
=
{
&
positions
.
getDevicePointer
(),
&
contractedPositions
.
getDevicePointer
()};
cu
.
executeKernel
(
positionContractionKernels
[
copies
],
contractPosArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Compute forces.
for
(
int
i
=
0
;
i
<
copies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
(),
void
*
copyToContextArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
contractedPositions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
computeVirtualSites
();
context
.
calcForcesAndEnergy
(
true
,
false
,
groupFlags
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
contractedForces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
contractedForces
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
contractedPositions
.
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
}
// Apply the forces to the original copies.
void
*
contractForceArgs
[]
=
{
&
forces
->
getDevicePointer
(),
&
contractedForces
->
getDevicePointer
()};
void
*
contractForceArgs
[]
=
{
&
forces
.
getDevicePointer
(),
&
contractedForces
.
getDevicePointer
()};
cu
.
executeKernel
(
forceContractionKernels
[
copies
],
contractForceArgs
,
numParticles
*
numCopies
,
workgroupSize
);
}
if
(
groupsByCopies
.
size
()
>
0
)
{
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
int
i
=
numCopies
-
1
;
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
void
*
copyToContextArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
}
...
...
@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
}
void
CudaIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
if
(
positions
==
NULL
)
if
(
!
positions
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
if
(
pos
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
...
...
@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
result
=
cuMemcpyHtoD
(
positions
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
}
else
if
(
cu
.
getUseMixedPrecision
())
{
vector
<
float4
>
posqf
(
cu
.
getPaddedNumAtoms
());
...
...
@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
vector
<
double4
>
posq
(
cu
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
result
=
cuMemcpyHtoD
(
positions
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
}
else
{
vector
<
float4
>
posq
(
cu
.
getPaddedNumAtoms
());
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_float4
((
float
)
offsetPos
[
i
][
0
],
(
float
)
offsetPos
[
i
][
1
],
(
float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
posq
[
0
],
numParticles
*
sizeof
(
float4
));
result
=
cuMemcpyHtoD
(
positions
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
posq
[
0
],
numParticles
*
sizeof
(
float4
));
}
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
positions
->
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error uploading array "
<<
positions
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
if
(
velocities
==
NULL
)
if
(
!
velocities
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
if
(
vel
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
...
...
@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
cu
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
make_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
result
=
cuMemcpyHtoD
(
velocities
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
velm
[
0
],
numParticles
*
sizeof
(
double4
));
result
=
cuMemcpyHtoD
(
velocities
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
velm
[
0
],
numParticles
*
sizeof
(
double4
));
}
else
{
vector
<
float4
>
velm
(
cu
.
getPaddedNumAtoms
());
cu
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
make_float4
((
float
)
vel
[
i
][
0
],
(
float
)
vel
[
i
][
1
],
(
float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
result
=
cuMemcpyHtoD
(
velocities
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
velm
[
0
],
numParticles
*
sizeof
(
float4
));
result
=
cuMemcpyHtoD
(
velocities
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
velm
[
0
],
numParticles
*
sizeof
(
float4
));
}
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
velocities
->
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error uploading array "
<<
velocities
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
void
*
copyArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
void
*
copyArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
copy
};
cu
.
executeKernel
(
copyToContextKernel
,
copyArgs
,
cu
.
getNumAtoms
());
}
...
...
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
View file @
d9756688
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -46,9 +46,8 @@ namespace OpenMM {
class
CudaIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
CudaIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
)
,
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
),
contractedForces
(
NULL
),
contractedPositions
(
NULL
)
{
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
)
{
}
~
CudaIntegrateRPMDStepKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -91,11 +90,11 @@ private:
int
numCopies
,
numParticles
,
workgroupSize
;
std
::
map
<
int
,
int
>
groupsByCopies
;
int
groupsNotContracted
;
CudaArray
*
forces
;
CudaArray
*
positions
;
CudaArray
*
velocities
;
CudaArray
*
contractedForces
;
CudaArray
*
contractedPositions
;
CudaArray
forces
;
CudaArray
positions
;
CudaArray
velocities
;
CudaArray
contractedForces
;
CudaArray
contractedPositions
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
std
::
map
<
int
,
CUfunction
>
positionContractionKernels
;
std
::
map
<
int
,
CUfunction
>
forceContractionKernels
;
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
d9756688
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -41,19 +41,6 @@
using
namespace
OpenMM
;
using
namespace
std
;
OpenCLIntegrateRPMDStepKernel
::~
OpenCLIntegrateRPMDStepKernel
()
{
if
(
forces
!=
NULL
)
delete
forces
;
if
(
positions
!=
NULL
)
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
if
(
contractedForces
!=
NULL
)
delete
contractedForces
;
if
(
contractedPositions
!=
NULL
)
delete
contractedPositions
;
}
void
OpenCLIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
cl
.
getPlatformData
().
initializeContexts
(
system
);
numCopies
=
integrator
.
getNumCopies
();
...
...
@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw
OpenMMException
(
"RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."
);
int
paddedParticles
=
cl
.
getPaddedNumAtoms
();
int
forceElementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
forces
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
forceElementSize
,
"rpmdForces"
);
forces
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
forceElementSize
,
"rpmdForces"
);
bool
useDoublePrecision
=
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
());
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
positions
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
positions
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if
(
useDoublePrecision
)
{
vector
<
mm_double4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
vector
<
mm_double4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
else
{
vector
<
mm_float4
>
temp
(
positions
->
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
vector
<
mm_float4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
// Build a list of contractions.
...
...
@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
}
if
(
maxContractedCopies
>
0
)
{
contractedForces
=
new
OpenCLArray
(
cl
,
maxContractedCopies
*
paddedParticles
,
forceElementSize
,
"rpmdContractedForces"
);
contractedPositions
=
new
OpenCLArray
(
cl
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
contractedForces
.
initialize
(
cl
,
maxContractedCopies
*
paddedParticles
,
forceElementSize
,
"rpmdContractedForces"
);
contractedPositions
.
initialize
(
cl
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
}
// Create kernels.
...
...
@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void
OpenCLIntegrateRPMDStepKernel
::
initializeKernels
(
ContextImpl
&
context
)
{
hasInitializedKernel
=
true
;
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
velocities
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
forces
->
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
->
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
velocities
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
forces
.
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
.
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getPosq
().
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getVelm
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getPosq
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getForce
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
velocities
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
velocities
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
for
(
auto
&
g
:
groupsByCopies
)
{
int
copies
=
g
.
first
;
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedPositions
->
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
forces
->
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
->
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedPositions
.
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
forces
.
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
.
getDeviceBuffer
());
}
}
...
...
@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void
OpenCLIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
// Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
positions
.
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
i
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
...
...
@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them.
if
(
groupsByCopies
.
size
()
>
0
)
{
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
contractedPositions
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
contractedPositions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
contractedPositions
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
contractedPositions
.
getDeviceBuffer
());
for
(
auto
&
g
:
groupsByCopies
)
{
int
copies
=
g
.
first
;
int
groupFlags
=
g
.
second
;
...
...
@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if
(
groupsByCopies
.
size
()
>
0
)
{
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
numCopies
-
1
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
}
...
...
@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
}
void
OpenCLIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
if
(
positions
==
NULL
)
if
(
!
positions
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
if
(
pos
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
...
...
@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
}
else
if
(
cl
.
getUseMixedPrecision
())
{
vector
<
mm_float4
>
posqf
(
cl
.
getPaddedNumAtoms
());
...
...
@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector
<
mm_double4
>
posq
(
cl
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
}
else
{
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
());
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_float4
((
cl_float
)
offsetPos
[
i
][
0
],
(
cl_float
)
offsetPos
[
i
][
1
],
(
cl_float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
posq
[
0
]);
}
}
void
OpenCLIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
if
(
velocities
==
NULL
)
if
(
!
velocities
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
if
(
vel
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
...
...
@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
mm_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
velm
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
velm
[
0
]);
}
else
{
vector
<
mm_float4
>
velm
(
cl
.
getPaddedNumAtoms
());
cl
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
mm_float4
((
cl_float
)
vel
[
i
][
0
],
(
cl_float
)
vel
[
i
][
1
],
(
cl_float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
velm
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
velm
[
0
]);
}
}
void
OpenCLIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
if
(
!
hasInitializedKernel
)
initializeKernels
(
context
);
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
copy
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
}
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
View file @
d9756688
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -45,9 +45,8 @@ namespace OpenMM {
class
OpenCLIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
OpenCLIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernel
(
false
)
,
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
),
contractedForces
(
NULL
),
contractedPositions
(
NULL
)
{
IntegrateRPMDStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernel
(
false
)
{
}
~
OpenCLIntegrateRPMDStepKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -92,11 +91,11 @@ private:
int
numCopies
,
numParticles
,
workgroupSize
;
std
::
map
<
int
,
int
>
groupsByCopies
;
int
groupsNotContracted
;
OpenCLArray
*
forces
;
OpenCLArray
*
positions
;
OpenCLArray
*
velocities
;
OpenCLArray
*
contractedForces
;
OpenCLArray
*
contractedPositions
;
OpenCLArray
forces
;
OpenCLArray
positions
;
OpenCLArray
velocities
;
OpenCLArray
contractedForces
;
OpenCLArray
contractedPositions
;
cl
::
Kernel
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
std
::
map
<
int
,
cl
::
Kernel
>
positionContractionKernels
;
std
::
map
<
int
,
cl
::
Kernel
>
forceContractionKernels
;
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment