Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c589f1cc
Commit
c589f1cc
authored
May 28, 2013
by
Yutong Zhao
Browse files
Merge pull request #11 from peastman/master
Improved infrastructure for integrators
parents
6943ef5b
18f78c1e
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
180 additions
and
295 deletions
+180
-295
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+23
-27
platforms/cuda/src/CudaContext.h
platforms/cuda/src/CudaContext.h
+17
-12
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+18
-44
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+0
-6
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+23
-27
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+17
-12
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+13
-38
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+0
-6
plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+25
-52
plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+0
-2
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
+26
-53
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+0
-2
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+10
-7
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+8
-7
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
c589f1cc
...
...
@@ -67,7 +67,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
compiler
(
compiler
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
!
hasInitializedCuda
)
{
CHECK_RESULT2
(
cuInit
(
0
),
"Error initializing CUDA"
);
...
...
@@ -282,7 +282,6 @@ void CudaContext::initialize() {
atomIndex
[
i
]
=
i
;
atomIndexDevice
->
upload
(
atomIndex
);
findMoleculeGroups
();
moleculesInvalid
=
false
;
nonbonded
->
initialize
(
system
);
}
...
...
@@ -821,11 +820,6 @@ void CudaContext::findMoleculeGroups() {
}
void
CudaContext
::
invalidateMolecules
()
{
moleculesInvalid
=
true
;
}
void
CudaContext
::
validateMolecules
()
{
moleculesInvalid
=
false
;
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
return
;
bool
valid
=
true
;
...
...
@@ -925,24 +919,28 @@ void CudaContext::validateMolecules() {
findMoleculeGroups
();
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
reorderListeners
[
i
]
->
execute
();
reorderAtoms
();
}
void
CudaContext
::
reorderAtoms
(
bool
enforcePeriodic
)
{
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
void
CudaContext
::
reorderAtoms
()
{
atomsWereReordered
=
false
;
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
()
||
stepsSinceReorder
<
100
)
{
stepsSinceReorder
++
;
return
;
if
(
moleculesInvalid
)
validateMolecules
();
}
atomsWereReordered
=
true
;
stepsSinceReorder
=
0
;
if
(
useDoublePrecision
)
reorderAtomsImpl
<
double
,
double4
,
double
,
double4
>
(
enforcePeriodic
);
reorderAtomsImpl
<
double
,
double4
,
double
,
double4
>
();
else
if
(
useMixedPrecision
)
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
(
enforcePeriodic
);
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
else
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
(
enforcePeriodic
);
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
nonbonded
->
updateNeighborListSize
();
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
CudaContext
::
reorderAtomsImpl
(
bool
enforcePeriodic
)
{
void
CudaContext
::
reorderAtomsImpl
()
{
// Find the range of positions and the number of bins along each axis.
Real4
padding
=
{
0
,
0
,
0
,
0
};
...
...
@@ -1019,18 +1017,16 @@ void CudaContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
molPos
[
i
].
z
-=
dz
;
if
(
enforcePeriodic
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
oldPosq
[
atom
]
=
p
;
posCellOffsets
[
atom
].
x
-=
xcell
;
posCellOffsets
[
atom
].
y
-=
ycell
;
posCellOffsets
[
atom
].
z
-=
zcell
;
}
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
oldPosq
[
atom
]
=
p
;
posCellOffsets
[
atom
].
x
-=
xcell
;
posCellOffsets
[
atom
].
y
-=
ycell
;
posCellOffsets
[
atom
].
z
-=
zcell
;
}
}
}
...
...
platforms/cuda/src/CudaContext.h
View file @
c589f1cc
...
...
@@ -298,6 +298,18 @@ public:
void
setComputeForceCount
(
int
count
)
{
computeForceCount
=
count
;
}
/**
* Get the number of time steps since the atoms were reordered.
*/
int
getStepsSinceReorder
()
const
{
return
stepsSinceReorder
;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
}
/**
* Get the number of atoms.
*/
...
...
@@ -429,10 +441,8 @@ public:
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/
void
reorderAtoms
(
bool
enforcePeriodic
);
void
reorderAtoms
();
/**
* Add a listener that should be called whenever atoms get reordered. The CudaContext
* assumes ownership of the object, and deletes it when the context itself is deleted.
...
...
@@ -447,15 +457,9 @@ public:
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated
the next to reorderAtoms() is called
.
* and order to be revalidated.
*/
void
invalidateMolecules
();
/**
* Get whether the current molecule definitions are valid.
*/
bool
getMoleculesAreInvalid
()
{
return
moleculesInvalid
;
}
private:
struct
Molecule
;
struct
MoleculeGroup
;
...
...
@@ -472,7 +476,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
reorderAtomsImpl
(
bool
enforcePeriodic
);
void
reorderAtomsImpl
();
static
bool
hasInitializedCuda
;
const
System
&
system
;
double
time
,
computeCapability
;
...
...
@@ -481,11 +485,12 @@ private:
int
contextIndex
;
int
stepCount
;
int
computeForceCount
;
int
stepsSinceReorder
;
int
numAtoms
;
int
paddedNumAtoms
;
int
numAtomBlocks
;
int
numThreadBlocks
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
,
moleculesInvalid
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
;
std
::
string
compiler
,
tempDir
,
gpuArchitecture
;
float4
periodicBoxSizeFloat
,
invPeriodicBoxSizeFloat
;
double4
periodicBoxSize
,
invPeriodicBoxSize
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
c589f1cc
...
...
@@ -86,11 +86,6 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
cu
.
setAsCurrent
();
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cu
.
setAtomsWereReordered
(
false
);
if
(
nb
.
getUseCutoff
()
&&
includeNonbonded
&&
(
cu
.
getMoleculesAreInvalid
()
||
cu
.
getComputeForceCount
()
%
100
==
0
))
{
cu
.
reorderAtoms
(
!
cu
.
getMoleculesAreInvalid
());
nb
.
updateNeighborListSize
();
}
cu
.
setComputeForceCount
(
cu
.
getComputeForceCount
()
+
1
);
cu
.
clearAutoclearBuffers
();
if
(
includeNonbonded
)
...
...
@@ -220,6 +215,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<
}
for
(
int
i
=
0
;
i
<
(
int
)
cu
.
getPosCellOffsets
().
size
();
i
++
)
cu
.
getPosCellOffsets
()[
i
]
=
make_int4
(
0
,
0
,
0
,
0
);
cu
.
reorderAtoms
();
}
void
CudaUpdateStateDataKernel
::
getVelocities
(
ContextImpl
&
context
,
vector
<
Vec3
>&
velocities
)
{
...
...
@@ -317,8 +313,8 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream&
stream
.
write
((
char
*
)
&
time
,
sizeof
(
double
));
int
stepCount
=
cu
.
getStepCount
();
stream
.
write
((
char
*
)
&
stepCount
,
sizeof
(
int
));
int
computeForceCount
=
cu
.
getComputeForceCount
();
stream
.
write
((
char
*
)
&
computeForceCount
,
sizeof
(
int
));
int
stepsSinceReorder
=
cu
.
getStepsSinceReorder
();
stream
.
write
((
char
*
)
&
stepsSinceReorder
,
sizeof
(
int
));
char
*
buffer
=
(
char
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
buffer
);
stream
.
write
(
buffer
,
cu
.
getPosq
().
getSize
()
*
cu
.
getPosq
().
getElementSize
());
...
...
@@ -349,14 +345,14 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st
throw
OpenMMException
(
"Checkpoint was created with a different numeric precision"
);
double
time
;
stream
.
read
((
char
*
)
&
time
,
sizeof
(
double
));
int
stepCount
,
computeForceCount
;
int
stepCount
,
stepsSinceReorder
;
stream
.
read
((
char
*
)
&
stepCount
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
computeForceCount
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
stepsSinceReorder
,
sizeof
(
int
));
vector
<
CudaContext
*>&
contexts
=
cu
.
getPlatformData
().
contexts
;
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
contexts
[
i
]
->
setTime
(
time
);
contexts
[
i
]
->
setStepCount
(
stepCount
);
contexts
[
i
]
->
set
ComputeForceCount
(
computeForceCount
);
contexts
[
i
]
->
set
StepsSinceReorder
(
stepsSinceReorder
);
}
char
*
buffer
=
(
char
*
)
cu
.
getPinnedBuffer
();
stream
.
read
(
buffer
,
cu
.
getPosq
().
getSize
()
*
cu
.
getPosq
().
getElementSize
());
...
...
@@ -4134,6 +4130,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
cu
.
setTime
(
cu
.
getTime
()
+
dt
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateVerletStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
VerletIntegrator
&
integrator
)
{
...
...
@@ -4221,6 +4218,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
cu
.
setTime
(
cu
.
getTime
()
+
stepSize
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateLangevinStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
...
...
@@ -4283,6 +4281,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
cu
.
setTime
(
cu
.
getTime
()
+
stepSize
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateBrownianStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
BrownianIntegrator
&
integrator
)
{
...
...
@@ -4363,6 +4362,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
return
dt
;
}
...
...
@@ -4457,6 +4457,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
return
dt
;
}
...
...
@@ -5129,6 +5130,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
setTime
(
cu
.
getTime
()
+
integrator
.
getStepSize
());
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateCustomStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
...
...
@@ -5359,40 +5361,12 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
void
CudaApplyMonteCarloBarostatKernel
::
restoreCoordinates
(
ContextImpl
&
context
)
{
cu
.
setAsCurrent
();
if
(
cu
.
getAtomsWereReordered
())
{
// The atoms were reordered since we saved the positions, so we need to fix them.
const
vector
<
int
>
atomOrder
=
cu
.
getAtomIndex
();
int
numAtoms
=
cu
.
getNumAtoms
();
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
pos
=
(
double4
*
)
cu
.
getPinnedBuffer
();
savedPositions
->
download
(
pos
);
vector
<
double4
>
fixedPos
(
cu
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
fixedPos
[
lastAtomOrder
[
i
]]
=
pos
[
i
];
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
pos
[
i
]
=
fixedPos
[
atomOrder
[
i
]];
cu
.
getPosq
().
upload
(
pos
);
}
else
{
float4
*
pos
=
(
float4
*
)
cu
.
getPinnedBuffer
();
savedPositions
->
download
(
pos
);
vector
<
float4
>
fixedPos
(
cu
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
fixedPos
[
lastAtomOrder
[
i
]]
=
pos
[
i
];
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
pos
[
i
]
=
fixedPos
[
atomOrder
[
i
]];
cu
.
getPosq
().
upload
(
pos
);
}
}
else
{
int
bytesToCopy
=
cu
.
getPosq
().
getSize
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
));
CUresult
result
=
cuMemcpyDtoD
(
cu
.
getPosq
().
getDevicePointer
(),
savedPositions
->
getDevicePointer
(),
bytesToCopy
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
m
;
m
<<
"Error restoring positions for MC barostat: "
<<
cu
.
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
m
.
str
());
}
int
bytesToCopy
=
cu
.
getPosq
().
getSize
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
));
CUresult
result
=
cuMemcpyDtoD
(
cu
.
getPosq
().
getDevicePointer
(),
savedPositions
->
getDevicePointer
(),
bytesToCopy
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
m
;
m
<<
"Error restoring positions for MC barostat: "
<<
cu
.
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
m
.
str
());
}
}
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
c589f1cc
...
...
@@ -739,12 +739,6 @@ void testChangingParameters() {
nonbonded
->
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
nonbonded
->
setParticleParameters
(
i
,
1.5
*
charge
,
1.1
*
sigma
,
1.7
*
epsilon
);
}
double
total
=
0
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
sigma
,
epsilon
;
nonbonded
->
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
total
+=
charge
;
}
nonbonded
->
updateParametersInContext
(
cuContext
);
nonbonded
->
updateParametersInContext
(
referenceContext
);
cuState
=
cuContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
c589f1cc
...
...
@@ -66,7 +66,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
}
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
...
...
@@ -407,7 +407,6 @@ void OpenCLContext::initialize() {
atomIndex
[
i
]
=
i
;
atomIndexDevice
->
upload
(
atomIndex
);
findMoleculeGroups
();
moleculesInvalid
=
false
;
nonbonded
->
initialize
(
system
);
}
...
...
@@ -826,11 +825,6 @@ void OpenCLContext::findMoleculeGroups() {
}
void
OpenCLContext
::
invalidateMolecules
()
{
moleculesInvalid
=
true
;
}
void
OpenCLContext
::
validateMolecules
()
{
moleculesInvalid
=
false
;
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
return
;
bool
valid
=
true
;
...
...
@@ -930,24 +924,28 @@ void OpenCLContext::validateMolecules() {
findMoleculeGroups
();
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
reorderListeners
[
i
]
->
execute
();
reorderAtoms
();
}
void
OpenCLContext
::
reorderAtoms
(
bool
enforcePeriodic
)
{
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
())
void
OpenCLContext
::
reorderAtoms
()
{
atomsWereReordered
=
false
;
if
(
numAtoms
==
0
||
nonbonded
==
NULL
||
!
nonbonded
->
getUseCutoff
()
||
stepsSinceReorder
<
100
)
{
stepsSinceReorder
++
;
return
;
if
(
moleculesInvalid
)
validateMolecules
();
}
atomsWereReordered
=
true
;
stepsSinceReorder
=
0
;
if
(
useDoublePrecision
)
reorderAtomsImpl
<
cl_double
,
mm_double4
,
cl_double
,
mm_double4
>
(
enforcePeriodic
);
reorderAtomsImpl
<
cl_double
,
mm_double4
,
cl_double
,
mm_double4
>
();
else
if
(
useMixedPrecision
)
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
(
enforcePeriodic
);
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
else
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
(
enforcePeriodic
);
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
nonbonded
->
updateNeighborListSize
();
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
OpenCLContext
::
reorderAtomsImpl
(
bool
enforcePeriodic
)
{
void
OpenCLContext
::
reorderAtomsImpl
()
{
// Find the range of positions and the number of bins along each axis.
...
...
@@ -1023,18 +1021,16 @@ void OpenCLContext::reorderAtomsImpl(bool enforcePeriodic) {
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
molPos
[
i
].
z
-=
dz
;
if
(
enforcePeriodic
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
oldPosq
[
atom
]
=
p
;
posCellOffsets
[
atom
].
x
-=
xcell
;
posCellOffsets
[
atom
].
y
-=
ycell
;
posCellOffsets
[
atom
].
z
-=
zcell
;
}
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
oldPosq
[
atom
]
=
p
;
posCellOffsets
[
atom
].
x
-=
xcell
;
posCellOffsets
[
atom
].
y
-=
ycell
;
posCellOffsets
[
atom
].
z
-=
zcell
;
}
}
}
...
...
platforms/opencl/src/OpenCLContext.h
View file @
c589f1cc
...
...
@@ -381,6 +381,18 @@ public:
void
setComputeForceCount
(
int
count
)
{
computeForceCount
=
count
;
}
/**
* Get the number of time steps since the atoms were reordered.
*/
int
getStepsSinceReorder
()
const
{
return
stepsSinceReorder
;
}
/**
* Set the number of time steps since the atoms were reordered.
*/
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
}
/**
* Get the number of atoms.
*/
...
...
@@ -529,10 +541,8 @@ public:
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
*
* @param enforcePeriodic if true, the atom positions may be altered to enforce periodic boundary conditions
*/
void
reorderAtoms
(
bool
enforcePeriodic
);
void
reorderAtoms
();
/**
* Add a listener that should be called whenever atoms get reordered. The OpenCLContext
* assumes ownership of the object, and deletes it when the context itself is deleted.
...
...
@@ -547,15 +557,9 @@ public:
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
* and order to be revalidated
the next to reorderAtoms() is called
.
* and order to be revalidated.
*/
void
invalidateMolecules
();
/**
* Get whether the current molecule definitions are valid.
*/
bool
getMoleculesAreInvalid
()
{
return
moleculesInvalid
;
}
private:
struct
Molecule
;
struct
MoleculeGroup
;
...
...
@@ -572,7 +576,7 @@ private:
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
reorderAtomsImpl
(
bool
enforcePeriodic
);
void
reorderAtomsImpl
();
const
System
&
system
;
double
time
;
OpenCLPlatform
::
PlatformData
&
platformData
;
...
...
@@ -580,13 +584,14 @@ private:
int
contextIndex
;
int
stepCount
;
int
computeForceCount
;
int
stepsSinceReorder
;
int
numAtoms
;
int
paddedNumAtoms
;
int
numAtomBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
moleculesInvalid
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
;
std
::
string
defaultOptimizationOptions
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c589f1cc
...
...
@@ -106,11 +106,6 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
void
OpenCLCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cl
.
setAtomsWereReordered
(
false
);
if
(
nb
.
getUseCutoff
()
&&
includeNonbonded
&&
(
cl
.
getMoleculesAreInvalid
()
||
cl
.
getComputeForceCount
()
%
100
==
0
))
{
cl
.
reorderAtoms
(
!
cl
.
getMoleculesAreInvalid
());
nb
.
updateNeighborListSize
();
}
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
clearAutoclearBuffers
();
if
(
includeNonbonded
)
...
...
@@ -239,6 +234,7 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto
}
for
(
int
i
=
0
;
i
<
(
int
)
cl
.
getPosCellOffsets
().
size
();
i
++
)
cl
.
getPosCellOffsets
()[
i
]
=
mm_int4
(
0
,
0
,
0
,
0
);
cl
.
reorderAtoms
();
}
void
OpenCLUpdateStateDataKernel
::
getVelocities
(
ContextImpl
&
context
,
vector
<
Vec3
>&
velocities
)
{
...
...
@@ -342,8 +338,8 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream
.
write
((
char
*
)
&
time
,
sizeof
(
double
));
int
stepCount
=
cl
.
getStepCount
();
stream
.
write
((
char
*
)
&
stepCount
,
sizeof
(
int
));
int
computeForceCount
=
cl
.
getComputeForceCount
();
stream
.
write
((
char
*
)
&
computeForceCount
,
sizeof
(
int
));
int
stepsSinceReorder
=
cl
.
getStepsSinceReorder
();
stream
.
write
((
char
*
)
&
stepsSinceReorder
,
sizeof
(
int
));
char
*
buffer
=
(
char
*
)
cl
.
getPinnedBuffer
();
cl
.
getPosq
().
download
(
buffer
);
stream
.
write
(
buffer
,
cl
.
getPosq
().
getSize
()
*
cl
.
getPosq
().
getElementSize
());
...
...
@@ -373,14 +369,14 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
throw
OpenMMException
(
"Checkpoint was created with a different numeric precision"
);
double
time
;
stream
.
read
((
char
*
)
&
time
,
sizeof
(
double
));
int
stepCount
,
computeForceCount
;
int
stepCount
,
stepsSinceReorder
;
stream
.
read
((
char
*
)
&
stepCount
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
computeForceCount
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
stepsSinceReorder
,
sizeof
(
int
));
vector
<
OpenCLContext
*>&
contexts
=
cl
.
getPlatformData
().
contexts
;
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
contexts
[
i
]
->
setTime
(
time
);
contexts
[
i
]
->
setStepCount
(
stepCount
);
contexts
[
i
]
->
set
ComputeForceCount
(
computeForceCount
);
contexts
[
i
]
->
set
StepsSinceReorder
(
stepsSinceReorder
);
}
char
*
buffer
=
(
char
*
)
cl
.
getPinnedBuffer
();
stream
.
read
(
buffer
,
cl
.
getPosq
().
getSize
()
*
cl
.
getPosq
().
getElementSize
());
...
...
@@ -4296,6 +4292,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
cl
.
setTime
(
cl
.
getTime
()
+
dt
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
// Reduce UI lag.
...
...
@@ -4395,6 +4392,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
cl
.
setTime
(
cl
.
getTime
()
+
stepSize
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
// Reduce UI lag.
...
...
@@ -4473,6 +4471,7 @@ void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const Brow
cl
.
setTime
(
cl
.
getTime
()
+
stepSize
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
// Reduce UI lag.
...
...
@@ -4578,6 +4577,7 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co
}
cl
.
setTime
(
time
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
return
dt
;
}
...
...
@@ -4691,6 +4691,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
}
cl
.
setTime
(
time
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
return
dt
;
}
...
...
@@ -5354,6 +5355,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl
.
setTime
(
cl
.
getTime
()
+
integrator
.
getStepSize
());
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
// Reduce UI lag.
...
...
@@ -5580,34 +5582,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
}
void
OpenCLApplyMonteCarloBarostatKernel
::
restoreCoordinates
(
ContextImpl
&
context
)
{
if
(
cl
.
getAtomsWereReordered
())
{
// The atoms were reordered since we saved the positions, so we need to fix them.
const
vector
<
int
>
atomOrder
=
cl
.
getAtomIndex
();
int
numAtoms
=
cl
.
getNumAtoms
();
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
pos
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
savedPositions
->
download
(
pos
);
vector
<
mm_double4
>
fixedPos
(
cl
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
fixedPos
[
lastAtomOrder
[
i
]]
=
pos
[
i
];
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
pos
[
i
]
=
fixedPos
[
atomOrder
[
i
]];
cl
.
getPosq
().
upload
(
pos
);
}
else
{
mm_float4
*
pos
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
savedPositions
->
download
(
pos
);
vector
<
mm_float4
>
fixedPos
(
cl
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
fixedPos
[
lastAtomOrder
[
i
]]
=
pos
[
i
];
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
pos
[
i
]
=
fixedPos
[
atomOrder
[
i
]];
cl
.
getPosq
().
upload
(
pos
);
}
}
else
cl
.
getQueue
().
enqueueCopyBuffer
(
savedPositions
->
getDeviceBuffer
(),
cl
.
getPosq
().
getDeviceBuffer
(),
0
,
0
,
cl
.
getPosq
().
getSize
()
*
sizeof
(
mm_float4
));
cl
.
getQueue
().
enqueueCopyBuffer
(
savedPositions
->
getDeviceBuffer
(),
cl
.
getPosq
().
getDeviceBuffer
(),
0
,
0
,
cl
.
getPosq
().
getSize
()
*
sizeof
(
mm_float4
));
}
OpenCLRemoveCMMotionKernel
::~
OpenCLRemoveCMMotionKernel
()
{
...
...
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
View file @
c589f1cc
...
...
@@ -742,12 +742,6 @@ void testChangingParameters() {
nonbonded
->
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
nonbonded
->
setParticleParameters
(
i
,
1.5
*
charge
,
1.1
*
sigma
,
1.7
*
epsilon
);
}
double
total
=
0
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
sigma
,
epsilon
;
nonbonded
->
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
total
+=
charge
;
}
nonbonded
->
updateParametersInContext
(
clContext
);
nonbonded
->
updateParametersInContext
(
referenceContext
);
clState
=
clContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
...
...
plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
View file @
c589f1cc
...
...
@@ -358,33 +358,13 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
cu
.
setTime
(
cu
.
getTime
()
+
stepSize
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateDrudeLangevinStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
DrudeLangevinIntegrator
&
integrator
)
{
return
cu
.
getIntegrationUtilities
().
computeKineticEnergy
(
0.5
*
integrator
.
getStepSize
());
}
class
CudaIntegrateDrudeSCFStepKernel
::
ReorderListener
:
public
CudaContext
::
ReorderListener
{
public:
ReorderListener
(
CudaContext
&
cu
,
const
vector
<
int
>&
drudeParticles
,
vector
<
int
>&
reorderedDrudeParticles
)
:
cu
(
cu
),
drudeParticles
(
drudeParticles
),
reorderedDrudeParticles
(
reorderedDrudeParticles
)
{
}
void
execute
()
{
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
int
numParticles
=
order
.
size
();
vector
<
int
>
inverseOrder
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
inverseOrder
[
order
[
i
]]
=
i
;
int
numDrudeParticles
=
drudeParticles
.
size
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
i
++
)
reorderedDrudeParticles
[
i
]
=
inverseOrder
[
drudeParticles
[
i
]];
}
private:
CudaContext
&
cu
;
const
vector
<
int
>&
drudeParticles
;
vector
<
int
>&
reorderedDrudeParticles
;
};
CudaIntegrateDrudeSCFStepKernel
::~
CudaIntegrateDrudeSCFStepKernel
()
{
if
(
minimizerPos
!=
NULL
)
lbfgs_free
(
minimizerPos
);
...
...
@@ -401,9 +381,7 @@ void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const Dru
double
charge
,
polarizability
,
aniso12
,
aniso34
;
force
.
getParticleParameters
(
i
,
p
,
p1
,
p2
,
p3
,
p4
,
charge
,
polarizability
,
aniso12
,
aniso34
);
drudeParticles
.
push_back
(
p
);
reorderedDrudeParticles
.
push_back
(
p
);
}
cu
.
addReorderListener
(
new
ReorderListener
(
cu
,
drudeParticles
,
reorderedDrudeParticles
));
// Initialize the energy minimizer.
...
...
@@ -469,6 +447,7 @@ void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeS
cu
.
setTime
(
cu
.
getTime
()
+
dt
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateDrudeSCFStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
DrudeSCFIntegrator
&
integrator
)
{
...
...
@@ -478,39 +457,36 @@ double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& contex
struct
MinimizerData
{
ContextImpl
&
context
;
CudaContext
&
cu
;
vector
<
int
>&
reorderedD
rudeParticles
;
MinimizerData
(
ContextImpl
&
context
,
CudaContext
&
cu
,
vector
<
int
>&
reorderedD
rudeParticles
)
:
context
(
context
),
cu
(
cu
),
reorderedD
rudeParticles
(
reorderedD
rudeParticles
)
{}
vector
<
int
>&
d
rudeParticles
;
MinimizerData
(
ContextImpl
&
context
,
CudaContext
&
cu
,
vector
<
int
>&
d
rudeParticles
)
:
context
(
context
),
cu
(
cu
),
d
rudeParticles
(
d
rudeParticles
)
{}
};
static
lbfgsfloatval_t
evaluate
(
void
*
instance
,
const
lbfgsfloatval_t
*
x
,
lbfgsfloatval_t
*
g
,
const
int
n
,
const
lbfgsfloatval_t
step
)
{
MinimizerData
*
data
=
reinterpret_cast
<
MinimizerData
*>
(
instance
);
ContextImpl
&
context
=
data
->
context
;
CudaContext
&
cu
=
data
->
cu
;
vector
<
int
>&
reorderedD
rudeParticles
=
data
->
reorderedD
rudeParticles
;
int
numDrudeParticles
=
reorderedD
rudeParticles
.
size
();
vector
<
int
>&
d
rudeParticles
=
data
->
d
rudeParticles
;
int
numDrudeParticles
=
d
rudeParticles
.
size
();
// Set the particle positions.
cu
.
getPosq
().
download
(
cu
.
getPinnedBuffer
());
double4
periodicBoxSize
=
cu
.
getPeriodicBoxSize
();
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
double4
&
p
=
posq
[
reorderedDrudeParticles
[
i
]];
int4
offset
=
cu
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
]
+
offset
.
x
*
periodicBoxSize
.
x
;
p
.
y
=
x
[
3
*
i
+
1
]
+
offset
.
y
*
periodicBoxSize
.
y
;
p
.
z
=
x
[
3
*
i
+
2
]
+
offset
.
z
*
periodicBoxSize
.
z
;
double4
&
p
=
posq
[
drudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
];
p
.
y
=
x
[
3
*
i
+
1
];
p
.
z
=
x
[
3
*
i
+
2
];
}
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
float4
&
p
=
posq
[
reorderedDrudeParticles
[
i
]];
int4
offset
=
cu
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
]
+
offset
.
x
*
periodicBoxSize
.
x
;
p
.
y
=
x
[
3
*
i
+
1
]
+
offset
.
y
*
periodicBoxSize
.
y
;
p
.
z
=
x
[
3
*
i
+
2
]
+
offset
.
z
*
periodicBoxSize
.
z
;
float4
&
p
=
posq
[
drudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
];
p
.
y
=
x
[
3
*
i
+
1
];
p
.
z
=
x
[
3
*
i
+
2
];
}
}
cu
.
getPosq
().
upload
(
cu
.
getPinnedBuffer
());
...
...
@@ -523,7 +499,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
double
forceScale
=
-
1.0
/
0x100000000
;
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
int
index
=
reorderedD
rudeParticles
[
i
];
int
index
=
d
rudeParticles
[
i
];
g
[
3
*
i
]
=
forceScale
*
force
[
index
];
g
[
3
*
i
+
1
]
=
forceScale
*
force
[
index
+
paddedNumAtoms
];
g
[
3
*
i
+
2
]
=
forceScale
*
force
[
index
+
paddedNumAtoms
*
2
];
...
...
@@ -534,27 +510,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void
CudaIntegrateDrudeSCFStepKernel
::
minimize
(
ContextImpl
&
context
,
double
tolerance
)
{
// Record the initial positions.
int
numDrudeParticles
=
reorderedD
rudeParticles
.
size
();
int
numDrudeParticles
=
d
rudeParticles
.
size
();
cu
.
getPosq
().
download
(
cu
.
getPinnedBuffer
());
double4
periodicBoxSize
=
cu
.
getPeriodicBoxSize
();
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
double4
p
=
posq
[
reorderedDrudeParticles
[
i
]];
int4
offset
=
cu
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
-
offset
.
x
*
periodicBoxSize
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
-
offset
.
y
*
periodicBoxSize
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
-
offset
.
z
*
periodicBoxSize
.
z
;
double4
p
=
posq
[
drudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
;
}
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
float4
p
=
posq
[
reorderedDrudeParticles
[
i
]];
int4
offset
=
cu
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
-
offset
.
x
*
periodicBoxSize
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
-
offset
.
y
*
periodicBoxSize
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
-
offset
.
z
*
periodicBoxSize
.
z
;
float4
p
=
posq
[
drudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
;
}
minimizerParams
.
xtol
=
1e-7
;
}
...
...
@@ -571,6 +544,6 @@ void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tole
// Perform the minimization.
lbfgsfloatval_t
fx
;
MinimizerData
data
(
context
,
cu
,
reorderedD
rudeParticles
);
MinimizerData
data
(
context
,
cu
,
d
rudeParticles
);
lbfgs
(
numDrudeParticles
*
3
,
minimizerPos
,
&
fx
,
evaluate
,
NULL
,
&
data
,
&
minimizerParams
);
}
\ No newline at end of file
plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
View file @
c589f1cc
...
...
@@ -148,12 +148,10 @@ public:
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
DrudeSCFIntegrator
&
integrator
);
private:
class
ReorderListener
;
void
minimize
(
ContextImpl
&
context
,
double
tolerance
);
CudaContext
&
cu
;
double
prevStepSize
;
std
::
vector
<
int
>
drudeParticles
;
std
::
vector
<
int
>
reorderedDrudeParticles
;
lbfgsfloatval_t
*
minimizerPos
;
lbfgs_parameter_t
minimizerParams
;
CUfunction
kernel1
,
kernel2
;
...
...
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
View file @
c589f1cc
...
...
@@ -364,33 +364,13 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
cl
.
setTime
(
cl
.
getTime
()
+
stepSize
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
}
double
OpenCLIntegrateDrudeLangevinStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
DrudeLangevinIntegrator
&
integrator
)
{
return
cl
.
getIntegrationUtilities
().
computeKineticEnergy
(
0.5
*
integrator
.
getStepSize
());
}
class
OpenCLIntegrateDrudeSCFStepKernel
::
ReorderListener
:
public
OpenCLContext
::
ReorderListener
{
public:
ReorderListener
(
OpenCLContext
&
cl
,
const
vector
<
int
>&
drudeParticles
,
vector
<
int
>&
reorderedDrudeParticles
)
:
cl
(
cl
),
drudeParticles
(
drudeParticles
),
reorderedDrudeParticles
(
reorderedDrudeParticles
)
{
}
void
execute
()
{
const
vector
<
int
>&
order
=
cl
.
getAtomIndex
();
int
numParticles
=
order
.
size
();
vector
<
int
>
inverseOrder
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
inverseOrder
[
order
[
i
]]
=
i
;
int
numDrudeParticles
=
drudeParticles
.
size
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
i
++
)
reorderedDrudeParticles
[
i
]
=
inverseOrder
[
drudeParticles
[
i
]];
}
private:
OpenCLContext
&
cl
;
const
vector
<
int
>&
drudeParticles
;
vector
<
int
>&
reorderedDrudeParticles
;
};
OpenCLIntegrateDrudeSCFStepKernel
::~
OpenCLIntegrateDrudeSCFStepKernel
()
{
if
(
minimizerPos
!=
NULL
)
lbfgs_free
(
minimizerPos
);
...
...
@@ -406,9 +386,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D
double
charge
,
polarizability
,
aniso12
,
aniso34
;
force
.
getParticleParameters
(
i
,
p
,
p1
,
p2
,
p3
,
p4
,
charge
,
polarizability
,
aniso12
,
aniso34
);
drudeParticles
.
push_back
(
p
);
reorderedDrudeParticles
.
push_back
(
p
);
}
cl
.
addReorderListener
(
new
ReorderListener
(
cl
,
drudeParticles
,
reorderedDrudeParticles
));
// Initialize the energy minimizer.
...
...
@@ -481,6 +459,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
cl
.
setTime
(
cl
.
getTime
()
+
dt
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
// Reduce UI lag.
...
...
@@ -496,39 +475,36 @@ double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont
struct
MinimizerData
{
ContextImpl
&
context
;
OpenCLContext
&
cl
;
vector
<
int
>&
reorderedD
rudeParticles
;
MinimizerData
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
vector
<
int
>&
reorderedD
rudeParticles
)
:
context
(
context
),
cl
(
cl
),
reorderedD
rudeParticles
(
reorderedD
rudeParticles
)
{}
vector
<
int
>&
d
rudeParticles
;
MinimizerData
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
vector
<
int
>&
d
rudeParticles
)
:
context
(
context
),
cl
(
cl
),
d
rudeParticles
(
d
rudeParticles
)
{}
};
static
lbfgsfloatval_t
evaluate
(
void
*
instance
,
const
lbfgsfloatval_t
*
x
,
lbfgsfloatval_t
*
g
,
const
int
n
,
const
lbfgsfloatval_t
step
)
{
MinimizerData
*
data
=
reinterpret_cast
<
MinimizerData
*>
(
instance
);
ContextImpl
&
context
=
data
->
context
;
OpenCLContext
&
cl
=
data
->
cl
;
vector
<
int
>&
reorderedD
rudeParticles
=
data
->
reorderedD
rudeParticles
;
int
numDrudeParticles
=
reorderedD
rudeParticles
.
size
();
vector
<
int
>&
d
rudeParticles
=
data
->
d
rudeParticles
;
int
numDrudeParticles
=
d
rudeParticles
.
size
();
// Set the particle positions.
cl
.
getPosq
().
download
(
cl
.
getPinnedBuffer
());
mm_double4
periodicBoxSize
=
cl
.
getPeriodicBoxSizeDouble
();
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
posq
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
mm_double4
&
p
=
posq
[
reorderedDrudeParticles
[
i
]];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
]
+
offset
.
x
*
periodicBoxSize
.
x
;
p
.
y
=
x
[
3
*
i
+
1
]
+
offset
.
y
*
periodicBoxSize
.
y
;
p
.
z
=
x
[
3
*
i
+
2
]
+
offset
.
z
*
periodicBoxSize
.
z
;
mm_double4
&
p
=
posq
[
drudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
];
p
.
y
=
x
[
3
*
i
+
1
];
p
.
z
=
x
[
3
*
i
+
2
];
}
}
else
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
mm_float4
&
p
=
posq
[
reorderedDrudeParticles
[
i
]];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
]
+
offset
.
x
*
periodicBoxSize
.
x
;
p
.
y
=
x
[
3
*
i
+
1
]
+
offset
.
y
*
periodicBoxSize
.
y
;
p
.
z
=
x
[
3
*
i
+
2
]
+
offset
.
z
*
periodicBoxSize
.
z
;
mm_float4
&
p
=
posq
[
drudeParticles
[
i
]];
p
.
x
=
x
[
3
*
i
];
p
.
y
=
x
[
3
*
i
+
1
];
p
.
z
=
x
[
3
*
i
+
2
];
}
}
cl
.
getPosq
().
upload
(
cl
.
getPinnedBuffer
());
...
...
@@ -540,7 +516,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
force
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
int
index
=
reorderedD
rudeParticles
[
i
];
int
index
=
d
rudeParticles
[
i
];
g
[
3
*
i
]
=
-
force
[
index
].
x
;
g
[
3
*
i
+
1
]
=
-
force
[
index
].
y
;
g
[
3
*
i
+
2
]
=
-
force
[
index
].
z
;
...
...
@@ -549,7 +525,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
else
{
mm_float4
*
force
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
int
index
=
reorderedD
rudeParticles
[
i
];
int
index
=
d
rudeParticles
[
i
];
g
[
3
*
i
]
=
-
force
[
index
].
x
;
g
[
3
*
i
+
1
]
=
-
force
[
index
].
y
;
g
[
3
*
i
+
2
]
=
-
force
[
index
].
z
;
...
...
@@ -561,27 +537,24 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
void
OpenCLIntegrateDrudeSCFStepKernel
::
minimize
(
ContextImpl
&
context
,
double
tolerance
)
{
// Record the initial positions.
int
numDrudeParticles
=
reorderedD
rudeParticles
.
size
();
int
numDrudeParticles
=
d
rudeParticles
.
size
();
cl
.
getPosq
().
download
(
cl
.
getPinnedBuffer
());
mm_double4
periodicBoxSize
=
cl
.
getPeriodicBoxSizeDouble
();
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
posq
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
mm_double4
p
=
posq
[
reorderedDrudeParticles
[
i
]];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
-
offset
.
x
*
periodicBoxSize
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
-
offset
.
y
*
periodicBoxSize
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
-
offset
.
z
*
periodicBoxSize
.
z
;
mm_double4
p
=
posq
[
drudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
;
}
}
else
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
numDrudeParticles
;
++
i
)
{
mm_float4
p
=
posq
[
reorderedDrudeParticles
[
i
]];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
reorderedDrudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
-
offset
.
x
*
periodicBoxSize
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
-
offset
.
y
*
periodicBoxSize
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
-
offset
.
z
*
periodicBoxSize
.
z
;
mm_float4
p
=
posq
[
drudeParticles
[
i
]];
minimizerPos
[
3
*
i
]
=
p
.
x
;
minimizerPos
[
3
*
i
+
1
]
=
p
.
y
;
minimizerPos
[
3
*
i
+
2
]
=
p
.
z
;
}
minimizerParams
.
xtol
=
1e-7
;
}
...
...
@@ -598,6 +571,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
// Perform the minimization.
lbfgsfloatval_t
fx
;
MinimizerData
data
(
context
,
cl
,
reorderedD
rudeParticles
);
MinimizerData
data
(
context
,
cl
,
d
rudeParticles
);
lbfgs
(
numDrudeParticles
*
3
,
minimizerPos
,
&
fx
,
evaluate
,
NULL
,
&
data
,
&
minimizerParams
);
}
\ No newline at end of file
plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
View file @
c589f1cc
...
...
@@ -149,13 +149,11 @@ public:
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
DrudeSCFIntegrator
&
integrator
);
private:
class
ReorderListener
;
void
minimize
(
ContextImpl
&
context
,
double
tolerance
);
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
double
prevStepSize
;
std
::
vector
<
int
>
drudeParticles
;
std
::
vector
<
int
>
reorderedDrudeParticles
;
lbfgsfloatval_t
*
minimizerPos
;
lbfgs_parameter_t
minimizerParams
;
cl
::
Kernel
kernel1
,
kernel2
;
...
...
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
View file @
c589f1cc
...
...
@@ -132,6 +132,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
}
void
CudaIntegrateRPMDStepKernel
::
execute
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
,
bool
forcesAreValid
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
// Loop over copies and compute the force on each one.
...
...
@@ -178,6 +179,15 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
cu
.
setTime
(
cu
.
getTime
()
+
dt
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
if
(
cu
.
getAtomsWereReordered
()
&&
cu
.
getNonbondedUtilities
().
getUsePeriodic
())
{
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
int
i
=
numCopies
-
1
;
void
*
args
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
translateKernel
,
args
,
cu
.
getNumAtoms
());
}
}
void
CudaIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
...
...
@@ -188,13 +198,6 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context
.
computeVirtualSites
();
context
.
updateContextState
();
context
.
calcForcesAndEnergy
(
true
,
false
);
if
(
cu
.
getAtomsWereReordered
()
&&
cu
.
getNonbondedUtilities
().
getUsePeriodic
())
{
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
void
*
args
[]
=
{
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
translateKernel
,
args
,
cu
.
getNumAtoms
());
}
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
c589f1cc
...
...
@@ -190,6 +190,14 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
cl
.
setTime
(
cl
.
getTime
()
+
dt
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
if
(
cl
.
getAtomsWereReordered
()
&&
cl
.
getNonbondedUtilities
().
getUsePeriodic
())
{
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel
.
setArg
<
cl_int
>
(
3
,
numCopies
-
1
);
cl
.
executeKernel
(
translateKernel
,
cl
.
getNumAtoms
());
}
}
void
OpenCLIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
...
...
@@ -199,13 +207,6 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
context
.
computeVirtualSites
();
context
.
updateContextState
();
context
.
calcForcesAndEnergy
(
true
,
false
);
if
(
cl
.
getAtomsWereReordered
()
&&
cl
.
getNonbondedUtilities
().
getUsePeriodic
())
{
// Atoms may have been translated into a different periodic box, so apply
// the same translation to all the beads.
translateKernel
.
setArg
<
cl_int
>
(
3
,
i
);
cl
.
executeKernel
(
translateKernel
,
cl
.
getNumAtoms
());
}
copyFromContextKernel
.
setArg
<
cl_int
>
(
7
,
i
);
cl
.
executeKernel
(
copyFromContextKernel
,
cl
.
getNumAtoms
());
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment