Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d9756688
Unverified
Commit
d9756688
authored
Feb 16, 2018
by
peastman
Committed by
GitHub
Feb 16, 2018
Browse files
Merge pull request #1992 from peastman/arrays
Simplification to CudaArray and OpenCLArray
parents
979525c7
c75dba47
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
104 additions
and
134 deletions
+104
-134
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+7
-9
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+39
-52
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+7
-8
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+44
-57
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+7
-8
No files found.
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
View file @
d9756688
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -45,9 +45,8 @@ namespace OpenMM {
...
@@ -45,9 +45,8 @@ namespace OpenMM {
class
OpenCLCalcDrudeForceKernel
:
public
CalcDrudeForceKernel
{
class
OpenCLCalcDrudeForceKernel
:
public
CalcDrudeForceKernel
{
public:
public:
OpenCLCalcDrudeForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
OpenCLCalcDrudeForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcDrudeForceKernel
(
name
,
platform
),
cl
(
cl
)
,
particleParams
(
NULL
),
pairParams
(
NULL
)
{
CalcDrudeForceKernel
(
name
,
platform
),
cl
(
cl
)
{
}
}
~
OpenCLCalcDrudeForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -73,8 +72,8 @@ public:
...
@@ -73,8 +72,8 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
DrudeForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
DrudeForce
&
force
);
private:
private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLArray
*
particleParams
;
OpenCLArray
particleParams
;
OpenCLArray
*
pairParams
;
OpenCLArray
pairParams
;
};
};
/**
/**
...
@@ -83,9 +82,8 @@ private:
...
@@ -83,9 +82,8 @@ private:
class
OpenCLIntegrateDrudeLangevinStepKernel
:
public
IntegrateDrudeLangevinStepKernel
{
class
OpenCLIntegrateDrudeLangevinStepKernel
:
public
IntegrateDrudeLangevinStepKernel
{
public:
public:
OpenCLIntegrateDrudeLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
OpenCLIntegrateDrudeLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateDrudeLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
,
normalParticles
(
NULL
),
pairParticles
(
NULL
)
{
IntegrateDrudeLangevinStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
)
{
}
}
~
OpenCLIntegrateDrudeLangevinStepKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -112,8 +110,8 @@ private:
...
@@ -112,8 +110,8 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
double
prevStepSize
;
double
prevStepSize
;
OpenCLArray
*
normalParticles
;
OpenCLArray
normalParticles
;
OpenCLArray
*
pairParticles
;
OpenCLArray
pairParticles
;
cl
::
Kernel
kernel1
,
kernel2
,
hardwallKernel
;
cl
::
Kernel
kernel1
,
kernel2
,
hardwallKernel
;
};
};
...
...
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
View file @
d9756688
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
...
@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
}
}
}
}
CudaIntegrateRPMDStepKernel
::~
CudaIntegrateRPMDStepKernel
()
{
if
(
forces
!=
NULL
)
delete
forces
;
if
(
positions
!=
NULL
)
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
if
(
contractedForces
!=
NULL
)
delete
contractedForces
;
if
(
contractedPositions
!=
NULL
)
delete
contractedPositions
;
}
void
CudaIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
void
CudaIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getPlatformData
().
initializeContexts
(
system
);
numCopies
=
integrator
.
getNumCopies
();
numCopies
=
integrator
.
getNumCopies
();
...
@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
...
@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
int
paddedParticles
=
cu
.
getPaddedNumAtoms
();
int
paddedParticles
=
cu
.
getPaddedNumAtoms
();
bool
useDoublePrecision
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
());
bool
useDoublePrecision
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
());
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
double4
)
:
sizeof
(
float4
));
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
double4
)
:
sizeof
(
float4
));
forces
=
CudaArray
::
creat
e
<
long
long
>
(
cu
,
numCopies
*
paddedParticles
*
3
,
"rpmdForces"
);
forces
.
initializ
e
<
long
long
>
(
cu
,
numCopies
*
paddedParticles
*
3
,
"rpmdForces"
);
positions
=
new
CudaArray
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
positions
.
initialize
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
=
new
CudaArray
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
velocities
.
initialize
(
cu
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
vector
<
double4
>
temp
(
positions
->
getSize
());
vector
<
double4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
make_double4
(
0
,
0
,
0
,
0
);
temp
[
i
]
=
make_double4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
make_double4
(
0
,
0
,
0
,
1
);
temp
[
i
]
=
make_double4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
}
else
{
else
{
vector
<
float4
>
temp
(
positions
->
getSize
());
vector
<
float4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
0
);
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
1
);
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
}
// Build a list of contractions.
// Build a list of contractions.
...
@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
...
@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
}
}
}
}
if
(
maxContractedCopies
>
0
)
{
if
(
maxContractedCopies
>
0
)
{
contractedForces
=
CudaArray
::
creat
e
<
long
long
>
(
cu
,
maxContractedCopies
*
paddedParticles
*
3
,
"rpmdContractedForces"
);
contractedForces
.
initializ
e
<
long
long
>
(
cu
,
maxContractedCopies
*
paddedParticles
*
3
,
"rpmdContractedForces"
);
contractedPositions
=
new
CudaArray
(
cu
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
contractedPositions
.
initialize
(
cu
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
}
}
// Create kernels.
// Create kernels.
...
@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
...
@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
float
frictionFloat
=
(
float
)
friction
;
float
frictionFloat
=
(
float
)
friction
;
void
*
frictionPtr
=
(
useDoublePrecision
?
(
void
*
)
&
friction
:
(
void
*
)
&
frictionFloat
);
void
*
frictionPtr
=
(
useDoublePrecision
?
(
void
*
)
&
friction
:
(
void
*
)
&
frictionFloat
);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
numParticles
*
numCopies
);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
numParticles
*
numCopies
);
void
*
pileArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
,
dtPtr
,
kTPtr
,
frictionPtr
};
void
*
pileArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
,
dtPtr
,
kTPtr
,
frictionPtr
};
if
(
integrator
.
getApplyThermostat
())
if
(
integrator
.
getApplyThermostat
())
cu
.
executeKernel
(
pileKernel
,
pileArgs
,
numParticles
*
numCopies
,
workgroupSize
);
cu
.
executeKernel
(
pileKernel
,
pileArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Update positions and velocities.
// Update positions and velocities.
void
*
stepArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
dtPtr
,
kTPtr
};
void
*
stepArgs
[]
=
{
&
positions
.
getDevicePointer
(),
&
velocities
.
getDevicePointer
(),
&
forces
.
getDevicePointer
(),
dtPtr
,
kTPtr
};
cu
.
executeKernel
(
stepKernel
,
stepArgs
,
numParticles
*
numCopies
,
workgroupSize
);
cu
.
executeKernel
(
stepKernel
,
stepArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Calculate forces based on the updated positions.
// Calculate forces based on the updated positions.
...
@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
...
@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// Update velocities.
// Update velocities.
void
*
velocitiesArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
dtPtr
};
void
*
velocitiesArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
forces
.
getDevicePointer
(),
dtPtr
};
cu
.
executeKernel
(
velocitiesKernel
,
velocitiesArgs
,
numParticles
*
numCopies
,
workgroupSize
);
cu
.
executeKernel
(
velocitiesKernel
,
velocitiesArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Apply the PILE-L thermostat again.
// Apply the PILE-L thermostat again.
...
@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
...
@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// the same translation to all the beads.
// the same translation to all the beads.
int
i
=
numCopies
-
1
;
int
i
=
numCopies
-
1
;
void
*
args
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
void
*
args
[]
=
{
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
translateKernel
,
args
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
translateKernel
,
args
,
cu
.
getNumAtoms
());
}
}
}
}
...
@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...
@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
// Compute forces from all groups that didn't have a specified contraction.
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
void
*
copyToContextArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
computeVirtualSites
();
context
.
computeVirtualSites
();
...
@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...
@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if
(
initialBox
[
0
]
!=
finalBox
[
0
]
||
initialBox
[
1
]
!=
finalBox
[
1
]
||
initialBox
[
2
]
!=
finalBox
[
2
])
if
(
initialBox
[
0
]
!=
finalBox
[
0
]
||
initialBox
[
1
]
!=
finalBox
[
1
]
||
initialBox
[
2
]
!=
finalBox
[
2
])
throw
OpenMMException
(
"Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead."
);
throw
OpenMMException
(
"Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead."
);
context
.
calcForcesAndEnergy
(
true
,
false
,
groupsNotContracted
);
context
.
calcForcesAndEnergy
(
true
,
false
,
groupsNotContracted
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
&
velocities
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
}
}
...
@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...
@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Find the contracted positions.
// Find the contracted positions.
void
*
contractPosArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
()};
void
*
contractPosArgs
[]
=
{
&
positions
.
getDevicePointer
(),
&
contractedPositions
.
getDevicePointer
()};
cu
.
executeKernel
(
positionContractionKernels
[
copies
],
contractPosArgs
,
numParticles
*
numCopies
,
workgroupSize
);
cu
.
executeKernel
(
positionContractionKernels
[
copies
],
contractPosArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Compute forces.
// Compute forces.
for
(
int
i
=
0
;
i
<
copies
;
i
++
)
{
for
(
int
i
=
0
;
i
<
copies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
(),
void
*
copyToContextArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
contractedPositions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
computeVirtualSites
();
context
.
computeVirtualSites
();
context
.
calcForcesAndEnergy
(
true
,
false
,
groupFlags
);
context
.
calcForcesAndEnergy
(
true
,
false
,
groupFlags
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
contractedForces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
contractedForces
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
&
velocities
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
contractedPositions
.
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
}
}
// Apply the forces to the original copies.
// Apply the forces to the original copies.
void
*
contractForceArgs
[]
=
{
&
forces
->
getDevicePointer
(),
&
contractedForces
->
getDevicePointer
()};
void
*
contractForceArgs
[]
=
{
&
forces
.
getDevicePointer
(),
&
contractedForces
.
getDevicePointer
()};
cu
.
executeKernel
(
forceContractionKernels
[
copies
],
contractForceArgs
,
numParticles
*
numCopies
,
workgroupSize
);
cu
.
executeKernel
(
forceContractionKernels
[
copies
],
contractForceArgs
,
numParticles
*
numCopies
,
workgroupSize
);
}
}
if
(
groupsByCopies
.
size
()
>
0
)
{
if
(
groupsByCopies
.
size
()
>
0
)
{
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
int
i
=
numCopies
-
1
;
int
i
=
numCopies
-
1
;
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
void
*
copyToContextArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
}
}
...
@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
...
@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
}
}
void
CudaIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
void
CudaIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
if
(
positions
==
NULL
)
if
(
!
positions
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
if
(
pos
.
size
()
!=
numParticles
)
if
(
pos
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
...
@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
...
@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
cu
.
getPosq
().
download
(
posq
);
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
posq
[
i
]
=
make_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
result
=
cuMemcpyHtoD
(
positions
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
}
}
else
if
(
cu
.
getUseMixedPrecision
())
{
else
if
(
cu
.
getUseMixedPrecision
())
{
vector
<
float4
>
posqf
(
cu
.
getPaddedNumAtoms
());
vector
<
float4
>
posqf
(
cu
.
getPaddedNumAtoms
());
...
@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
...
@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
vector
<
double4
>
posq
(
cu
.
getPaddedNumAtoms
());
vector
<
double4
>
posq
(
cu
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
posq
[
i
]
=
make_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
result
=
cuMemcpyHtoD
(
positions
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
posq
[
0
],
numParticles
*
sizeof
(
double4
));
}
}
else
{
else
{
vector
<
float4
>
posq
(
cu
.
getPaddedNumAtoms
());
vector
<
float4
>
posq
(
cu
.
getPaddedNumAtoms
());
cu
.
getPosq
().
download
(
posq
);
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_float4
((
float
)
offsetPos
[
i
][
0
],
(
float
)
offsetPos
[
i
][
1
],
(
float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
posq
[
i
]
=
make_float4
((
float
)
offsetPos
[
i
][
0
],
(
float
)
offsetPos
[
i
][
1
],
(
float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
result
=
cuMemcpyHtoD
(
positions
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
posq
[
0
],
numParticles
*
sizeof
(
float4
));
result
=
cuMemcpyHtoD
(
positions
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
posq
[
0
],
numParticles
*
sizeof
(
float4
));
}
}
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
positions
->
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error uploading array "
<<
positions
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
throw
OpenMMException
(
str
.
str
());
}
}
}
}
void
CudaIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
void
CudaIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
if
(
velocities
==
NULL
)
if
(
!
velocities
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
if
(
vel
.
size
()
!=
numParticles
)
if
(
vel
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
...
@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
...
@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
cu
.
getVelm
().
download
(
velm
);
cu
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
make_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
velm
[
i
]
=
make_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
result
=
cuMemcpyHtoD
(
velocities
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
velm
[
0
],
numParticles
*
sizeof
(
double4
));
result
=
cuMemcpyHtoD
(
velocities
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
double4
),
&
velm
[
0
],
numParticles
*
sizeof
(
double4
));
}
}
else
{
else
{
vector
<
float4
>
velm
(
cu
.
getPaddedNumAtoms
());
vector
<
float4
>
velm
(
cu
.
getPaddedNumAtoms
());
cu
.
getVelm
().
download
(
velm
);
cu
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
make_float4
((
float
)
vel
[
i
][
0
],
(
float
)
vel
[
i
][
1
],
(
float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
velm
[
i
]
=
make_float4
((
float
)
vel
[
i
][
0
],
(
float
)
vel
[
i
][
1
],
(
float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
result
=
cuMemcpyHtoD
(
velocities
->
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
velm
[
0
],
numParticles
*
sizeof
(
float4
));
result
=
cuMemcpyHtoD
(
velocities
.
getDevicePointer
()
+
copy
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
float4
),
&
velm
[
0
],
numParticles
*
sizeof
(
float4
));
}
}
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
velocities
->
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
str
<<
"Error uploading array "
<<
velocities
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
throw
OpenMMException
(
str
.
str
());
}
}
}
}
void
CudaIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
void
CudaIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
void
*
copyArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
void
*
copyArgs
[]
=
{
&
velocities
.
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
.
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
copy
};
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
copy
};
cu
.
executeKernel
(
copyToContextKernel
,
copyArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
copyToContextKernel
,
copyArgs
,
cu
.
getNumAtoms
());
}
}
...
...
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
View file @
d9756688
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -46,9 +46,8 @@ namespace OpenMM {
...
@@ -46,9 +46,8 @@ namespace OpenMM {
class
CudaIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
class
CudaIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
public:
CudaIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CudaIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
)
,
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
),
contractedForces
(
NULL
),
contractedPositions
(
NULL
)
{
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
)
{
}
}
~
CudaIntegrateRPMDStepKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -91,11 +90,11 @@ private:
...
@@ -91,11 +90,11 @@ private:
int
numCopies
,
numParticles
,
workgroupSize
;
int
numCopies
,
numParticles
,
workgroupSize
;
std
::
map
<
int
,
int
>
groupsByCopies
;
std
::
map
<
int
,
int
>
groupsByCopies
;
int
groupsNotContracted
;
int
groupsNotContracted
;
CudaArray
*
forces
;
CudaArray
forces
;
CudaArray
*
positions
;
CudaArray
positions
;
CudaArray
*
velocities
;
CudaArray
velocities
;
CudaArray
*
contractedForces
;
CudaArray
contractedForces
;
CudaArray
*
contractedPositions
;
CudaArray
contractedPositions
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
std
::
map
<
int
,
CUfunction
>
positionContractionKernels
;
std
::
map
<
int
,
CUfunction
>
positionContractionKernels
;
std
::
map
<
int
,
CUfunction
>
forceContractionKernels
;
std
::
map
<
int
,
CUfunction
>
forceContractionKernels
;
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
d9756688
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -41,19 +41,6 @@
...
@@ -41,19 +41,6 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
OpenCLIntegrateRPMDStepKernel
::~
OpenCLIntegrateRPMDStepKernel
()
{
if
(
forces
!=
NULL
)
delete
forces
;
if
(
positions
!=
NULL
)
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
if
(
contractedForces
!=
NULL
)
delete
contractedForces
;
if
(
contractedPositions
!=
NULL
)
delete
contractedPositions
;
}
void
OpenCLIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
void
OpenCLIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
cl
.
getPlatformData
().
initializeContexts
(
system
);
cl
.
getPlatformData
().
initializeContexts
(
system
);
numCopies
=
integrator
.
getNumCopies
();
numCopies
=
integrator
.
getNumCopies
();
...
@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
...
@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw
OpenMMException
(
"RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."
);
throw
OpenMMException
(
"RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."
);
int
paddedParticles
=
cl
.
getPaddedNumAtoms
();
int
paddedParticles
=
cl
.
getPaddedNumAtoms
();
int
forceElementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
int
forceElementSize
=
(
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
forces
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
forceElementSize
,
"rpmdForces"
);
forces
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
forceElementSize
,
"rpmdForces"
);
bool
useDoublePrecision
=
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
());
bool
useDoublePrecision
=
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
());
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
int
elementSize
=
(
useDoublePrecision
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
));
positions
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
positions
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdPositions"
);
velocities
=
new
OpenCLArray
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
velocities
.
initialize
(
cl
,
numCopies
*
paddedParticles
,
elementSize
,
"rpmdVelocities"
);
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
cl
.
getIntegrationUtilities
().
initRandomNumberGenerator
((
unsigned
int
)
integrator
.
getRandomNumberSeed
());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
vector
<
mm_double4
>
temp
(
positions
->
getSize
());
vector
<
mm_double4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
0
);
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
1
);
temp
[
i
]
=
mm_double4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
}
else
{
else
{
vector
<
mm_float4
>
temp
(
positions
->
getSize
());
vector
<
mm_float4
>
temp
(
positions
.
getSize
());
for
(
int
i
=
0
;
i
<
positions
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
positions
.
getSize
();
i
++
)
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
0
);
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
0
);
positions
->
upload
(
temp
);
positions
.
upload
(
temp
);
for
(
int
i
=
0
;
i
<
velocities
->
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
velocities
.
getSize
();
i
++
)
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
1
);
temp
[
i
]
=
mm_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
velocities
.
upload
(
temp
);
}
}
// Build a list of contractions.
// Build a list of contractions.
...
@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
...
@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
}
}
}
if
(
maxContractedCopies
>
0
)
{
if
(
maxContractedCopies
>
0
)
{
contractedForces
=
new
OpenCLArray
(
cl
,
maxContractedCopies
*
paddedParticles
,
forceElementSize
,
"rpmdContractedForces"
);
contractedForces
.
initialize
(
cl
,
maxContractedCopies
*
paddedParticles
,
forceElementSize
,
"rpmdContractedForces"
);
contractedPositions
=
new
OpenCLArray
(
cl
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
contractedPositions
.
initialize
(
cl
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
}
}
// Create kernels.
// Create kernels.
...
@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
...
@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void
OpenCLIntegrateRPMDStepKernel
::
initializeKernels
(
ContextImpl
&
context
)
{
void
OpenCLIntegrateRPMDStepKernel
::
initializeKernels
(
ContextImpl
&
context
)
{
hasInitializedKernel
=
true
;
hasInitializedKernel
=
true
;
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
pileKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
velocities
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
velocities
.
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
forces
->
getDeviceBuffer
());
stepKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
forces
.
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
->
getDeviceBuffer
());
velocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
.
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getPosq
().
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getPosq
().
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
translateKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
velocities
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getVelm
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getVelm
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getPosq
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getPosq
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getForce
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getForce
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
velocities
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
velocities
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
cl
.
getAtomIndexArray
().
getDeviceBuffer
());
for
(
auto
&
g
:
groupsByCopies
)
{
for
(
auto
&
g
:
groupsByCopies
)
{
int
copies
=
g
.
first
;
int
copies
=
g
.
first
;
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
positions
->
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
positions
.
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedPositions
->
getDeviceBuffer
());
positionContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedPositions
.
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
forces
->
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
0
,
forces
.
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
->
getDeviceBuffer
());
forceContractionKernels
[
copies
].
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
.
getDeviceBuffer
());
}
}
}
}
...
@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
...
@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void
OpenCLIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
void
OpenCLIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
// Compute forces from all groups that didn't have a specified contraction.
// Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forces
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
positions
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
positions
.
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
i
);
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
i
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
...
@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...
@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them.
// Now loop over contractions and compute forces from them.
if
(
groupsByCopies
.
size
()
>
0
)
{
if
(
groupsByCopies
.
size
()
>
0
)
{
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
contractedPositions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
contractedPositions
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
contractedForces
.
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
contractedPositions
->
getDeviceBuffer
());
copyFromContextKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
contractedPositions
.
getDeviceBuffer
());
for
(
auto
&
g
:
groupsByCopies
)
{
for
(
auto
&
g
:
groupsByCopies
)
{
int
copies
=
g
.
first
;
int
copies
=
g
.
first
;
int
groupFlags
=
g
.
second
;
int
groupFlags
=
g
.
second
;
...
@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
...
@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if
(
groupsByCopies
.
size
()
>
0
)
{
if
(
groupsByCopies
.
size
()
>
0
)
{
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
numCopies
-
1
);
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
numCopies
-
1
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
}
}
...
@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
...
@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
}
}
void
OpenCLIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
void
OpenCLIntegrateRPMDStepKernel
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
pos
)
{
if
(
positions
==
NULL
)
if
(
!
positions
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
throw
OpenMMException
(
"RPMDIntegrator: Cannot set positions before the integrator is added to a Context"
);
if
(
pos
.
size
()
!=
numParticles
)
if
(
pos
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setPositions()"
);
...
@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
...
@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl
.
getPosq
().
download
(
posq
);
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
}
}
else
if
(
cl
.
getUseMixedPrecision
())
{
else
if
(
cl
.
getUseMixedPrecision
())
{
vector
<
mm_float4
>
posqf
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float4
>
posqf
(
cl
.
getPaddedNumAtoms
());
...
@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
...
@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector
<
mm_double4
>
posq
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_double4
>
posq
(
cl
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
posq
[
i
]
=
mm_double4
(
offsetPos
[
i
][
0
],
offsetPos
[
i
][
1
],
offsetPos
[
i
][
2
],
posqf
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
posq
[
0
]);
}
}
else
{
else
{
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float4
>
posq
(
cl
.
getPaddedNumAtoms
());
cl
.
getPosq
().
download
(
posq
);
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
mm_float4
((
cl_float
)
offsetPos
[
i
][
0
],
(
cl_float
)
offsetPos
[
i
][
1
],
(
cl_float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
posq
[
i
]
=
mm_float4
((
cl_float
)
offsetPos
[
i
][
0
],
(
cl_float
)
offsetPos
[
i
][
1
],
(
cl_float
)
offsetPos
[
i
][
2
],
posq
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
posq
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
positions
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
posq
[
0
]);
}
}
}
}
void
OpenCLIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
void
OpenCLIntegrateRPMDStepKernel
::
setVelocities
(
int
copy
,
const
vector
<
Vec3
>&
vel
)
{
if
(
velocities
==
NULL
)
if
(
!
velocities
.
isInitialized
()
)
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
throw
OpenMMException
(
"RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"
);
if
(
vel
.
size
()
!=
numParticles
)
if
(
vel
.
size
()
!=
numParticles
)
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
throw
OpenMMException
(
"RPMDIntegrator: wrong number of values passed to setVelocities()"
);
...
@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
...
@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl
.
getVelm
().
download
(
velm
);
cl
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
mm_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
velm
[
i
]
=
mm_double4
(
vel
[
i
][
0
],
vel
[
i
][
1
],
vel
[
i
][
2
],
velm
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
velm
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_double4
),
numParticles
*
sizeof
(
mm_double4
),
&
velm
[
0
]);
}
}
else
{
else
{
vector
<
mm_float4
>
velm
(
cl
.
getPaddedNumAtoms
());
vector
<
mm_float4
>
velm
(
cl
.
getPaddedNumAtoms
());
cl
.
getVelm
().
download
(
velm
);
cl
.
getVelm
().
download
(
velm
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
velm
[
i
]
=
mm_float4
((
cl_float
)
vel
[
i
][
0
],
(
cl_float
)
vel
[
i
][
1
],
(
cl_float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
velm
[
i
]
=
mm_float4
((
cl_float
)
vel
[
i
][
0
],
(
cl_float
)
vel
[
i
][
1
],
(
cl_float
)
vel
[
i
][
2
],
velm
[
i
].
w
);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
->
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
velm
[
0
]);
cl
.
getQueue
().
enqueueWriteBuffer
(
velocities
.
getDeviceBuffer
(),
CL_TRUE
,
copy
*
cl
.
getPaddedNumAtoms
()
*
sizeof
(
mm_float4
),
numParticles
*
sizeof
(
mm_float4
),
&
velm
[
0
]);
}
}
}
}
void
OpenCLIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
void
OpenCLIntegrateRPMDStepKernel
::
copyToContext
(
int
copy
,
ContextImpl
&
context
)
{
if
(
!
hasInitializedKernel
)
if
(
!
hasInitializedKernel
)
initializeKernels
(
context
);
initializeKernels
(
context
);
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
->
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
positions
.
getDeviceBuffer
());
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
copy
);
copyToContextKernel
.
setArg
<
cl_int
>
(
5
,
copy
);
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
copyToContextKernel
,
cl
.
getNumAtoms
());
}
}
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
View file @
d9756688
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -45,9 +45,8 @@ namespace OpenMM {
...
@@ -45,9 +45,8 @@ namespace OpenMM {
class
OpenCLIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
class
OpenCLIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
public:
OpenCLIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
OpenCLIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernel
(
false
)
,
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
),
contractedForces
(
NULL
),
contractedPositions
(
NULL
)
{
IntegrateRPMDStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernel
(
false
)
{
}
}
~
OpenCLIntegrateRPMDStepKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -92,11 +91,11 @@ private:
...
@@ -92,11 +91,11 @@ private:
int
numCopies
,
numParticles
,
workgroupSize
;
int
numCopies
,
numParticles
,
workgroupSize
;
std
::
map
<
int
,
int
>
groupsByCopies
;
std
::
map
<
int
,
int
>
groupsByCopies
;
int
groupsNotContracted
;
int
groupsNotContracted
;
OpenCLArray
*
forces
;
OpenCLArray
forces
;
OpenCLArray
*
positions
;
OpenCLArray
positions
;
OpenCLArray
*
velocities
;
OpenCLArray
velocities
;
OpenCLArray
*
contractedForces
;
OpenCLArray
contractedForces
;
OpenCLArray
*
contractedPositions
;
OpenCLArray
contractedPositions
;
cl
::
Kernel
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
cl
::
Kernel
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
std
::
map
<
int
,
cl
::
Kernel
>
positionContractionKernels
;
std
::
map
<
int
,
cl
::
Kernel
>
positionContractionKernels
;
std
::
map
<
int
,
cl
::
Kernel
>
forceContractionKernels
;
std
::
map
<
int
,
cl
::
Kernel
>
forceContractionKernels
;
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment