Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
859bfd6c
Commit
859bfd6c
authored
Feb 15, 2017
by
peastman
Browse files
Use lambdas for thread pool tasks
parent
2c559596
Changes
32
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
137 additions
and
296 deletions
+137
-296
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+39
-59
platforms/cpu/src/CpuLangevinDynamics.cpp
platforms/cpu/src/CpuLangevinDynamics.cpp
+4
-37
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+2
-13
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+2
-13
platforms/cpu/src/CpuSETTLE.cpp
platforms/cpu/src/CpuSETTLE.cpp
+21
-51
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+0
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+23
-34
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+0
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+22
-33
platforms/reference/src/SimTKReference/ReferenceCCMAAlgorithm.cpp
...s/reference/src/SimTKReference/ReferenceCCMAAlgorithm.cpp
+21
-39
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+2
-13
plugins/cpupme/src/CpuPmeKernels.h
plugins/cpupme/src/CpuPmeKernels.h
+1
-2
No files found.
platforms/cpu/src/CpuKernels.cpp
View file @
859bfd6c
...
...
@@ -137,35 +137,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return
0.5
*
energy
;
}
class
CpuCalcForcesAndEnergyKernel
::
SumForceTask
:
public
ThreadPool
::
Task
{
public:
SumForceTask
(
int
numParticles
,
vector
<
RealVec
>&
forceData
,
CpuPlatform
::
PlatformData
&
data
)
:
numParticles
(
numParticles
),
forceData
(
forceData
),
data
(
data
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Sum the contributions to forces that have been calculated by different threads.
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
// Create a Reference platform version of this kernel.
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
}
int
numParticles
;
vector
<
RealVec
>&
forceData
;
CpuPlatform
::
PlatformData
&
data
;
};
ReferenceKernelFactory
referenceFactory
;
referenceKernel
=
Kernel
(
referenceFactory
.
createKernelImpl
(
name
,
platform
,
context
));
}
class
CpuCalcForcesAndEnergyKernel
::
InitForceTask
:
public
ThreadPool
::
Task
{
public:
InitForceTask
(
int
numParticles
,
ContextImpl
&
context
,
CpuPlatform
::
PlatformData
&
data
)
:
numParticles
(
numParticles
),
positionsValid
(
true
),
context
(
context
),
data
(
data
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
void
CpuCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
initialize
(
system
);
lastPositions
.
resize
(
system
.
getNumParticles
(),
Vec3
(
1e10
,
1e10
,
1e10
));
}
void
CpuCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
// Convert positions to single precision and clear the forces.
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
bool
positionsValid
=
true
;
data
.
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Convert the positions to single precision and apply periodic boundary conditions
AlignedArray
<
float
>&
posq
=
data
.
posq
;
...
...
@@ -218,36 +210,9 @@ public:
fvec4
zero
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
zero
.
store
(
&
data
.
threadForce
[
threadIndex
][
j
*
4
]);
}
int
numParticles
;
bool
positionsValid
;
ContextImpl
&
context
;
CpuPlatform
::
PlatformData
&
data
;
};
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
// Create a Reference platform version of this kernel.
ReferenceKernelFactory
referenceFactory
;
referenceKernel
=
Kernel
(
referenceFactory
.
createKernelImpl
(
name
,
platform
,
context
));
}
void
CpuCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
initialize
(
system
);
lastPositions
.
resize
(
system
.
getNumParticles
(),
Vec3
(
1e10
,
1e10
,
1e10
));
}
void
CpuCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
// Convert positions to single precision and clear the forces.
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
InitForceTask
task
(
numParticles
,
context
,
data
);
data
.
threads
.
execute
(
task
);
});
data
.
threads
.
waitForThreads
();
if
(
!
task
.
positionsValid
)
if
(
!
positionsValid
)
throw
OpenMMException
(
"Particle coordinate is nan"
);
// Determine whether we need to recompute the neighbor list.
...
...
@@ -302,8 +267,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
// Sum the forces from all the threads.
SumForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
extractForces
(
context
),
data
);
data
.
threads
.
execute
(
task
);
data
.
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Sum the contributions to forces that have been calculated by different threads.
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
});
data
.
threads
.
waitForThreads
();
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
,
valid
);
}
...
...
platforms/cpu/src/CpuLangevinDynamics.cpp
View file @
859bfd6c
/* Portions copyright (c) 2006-201
6
Stanford University and Simbios.
/* Portions copyright (c) 2006-201
7
Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
...
...
@@ -29,36 +29,6 @@
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuLangevinDynamics
::
Update1Task
:
public
ThreadPool
::
Task
{
public:
Update1Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate1
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
class
CpuLangevinDynamics
::
Update2Task
:
public
ThreadPool
::
Task
{
public:
Update2Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate2
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
class
CpuLangevinDynamics
::
Update3Task
:
public
ThreadPool
::
Task
{
public:
Update3Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate3
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
CpuLangevinDynamics
::
CpuLangevinDynamics
(
int
numberOfAtoms
,
RealOpenMM
deltaT
,
RealOpenMM
friction
,
RealOpenMM
temperature
,
ThreadPool
&
threads
,
CpuRandom
&
random
)
:
ReferenceStochasticDynamics
(
numberOfAtoms
,
deltaT
,
friction
,
temperature
),
threads
(
threads
),
random
(
random
)
{
}
...
...
@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update1Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadUpdate1
(
threadIndex
);
});
threads
.
waitForThreads
();
}
...
...
@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update2Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadUpdate2
(
threadIndex
);
});
threads
.
waitForThreads
();
}
...
...
@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update3Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadUpdate3
(
threadIndex
);
});
threads
.
waitForThreads
();
}
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
859bfd6c
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -409,16 +409,6 @@ private:
vector
<
vector
<
vector
<
pair
<
float
,
int
>
>
>
>
bins
;
};
class
CpuNeighborList
::
ThreadTask
:
public
ThreadPool
::
Task
{
public:
ThreadTask
(
CpuNeighborList
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeNeighborList
(
threads
,
threadIndex
);
}
CpuNeighborList
&
owner
;
};
CpuNeighborList
::
CpuNeighborList
(
int
blockSize
)
:
blockSize
(
blockSize
)
{
}
...
...
@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve.
atomBins
.
resize
(
numAtoms
);
ThreadTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadComputeNeighborList
(
threads
,
threadIndex
);
});
threads
.
waitForThreads
();
sort
(
atomBins
.
begin
(),
atomBins
.
end
());
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
859bfd6c
/* Portions copyright (c) 2006-201
5
Stanford University and Simbios.
/* Portions copyright (c) 2006-201
7
Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
...
...
@@ -41,16 +41,6 @@ using namespace OpenMM;
const
float
CpuNonbondedForce
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
const
int
CpuNonbondedForce
::
NUM_TABLE_POINTS
=
2048
;
class
CpuNonbondedForce
::
ComputeDirectTask
:
public
ThreadPool
::
Task
{
public:
ComputeDirectTask
(
CpuNonbondedForce
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeDirect
(
threads
,
threadIndex
);
}
CpuNonbondedForce
&
owner
;
};
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
...
...
@@ -318,8 +308,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadComputeDirect
(
threads
,
threadIndex
);
});
threads
.
waitForThreads
();
// Signal the threads to subtract the exclusions.
...
...
platforms/cpu/src/CpuSETTLE.cpp
View file @
859bfd6c
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -35,52 +35,6 @@
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuSETTLE
::
ApplyToPositionsTask
:
public
ThreadPool
::
Task
{
public:
ApplyToPositionsTask
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
atomCoordinatesP
(
atomCoordinatesP
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
gmx_atomic_set
(
&
atomicCounter
,
0
);
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
apply
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
);
}
}
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
;
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
;
vector
<
RealOpenMM
>&
inverseMasses
;
RealOpenMM
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
gmx_atomic_t
atomicCounter
;
};
class
CpuSETTLE
::
ApplyToVelocitiesTask
:
public
ThreadPool
::
Task
{
public:
ApplyToVelocitiesTask
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
velocities
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
velocities
(
velocities
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
gmx_atomic_set
(
&
atomicCounter
,
0
);
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
applyToVelocities
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
);
}
}
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
;
vector
<
OpenMM
::
RealVec
>&
velocities
;
vector
<
RealOpenMM
>&
inverseMasses
;
RealOpenMM
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
gmx_atomic_t
atomicCounter
;
};
CpuSETTLE
::
CpuSETTLE
(
const
System
&
system
,
const
ReferenceSETTLEAlgorithm
&
settle
,
ThreadPool
&
threads
)
:
threads
(
threads
)
{
int
numBlocks
=
10
*
threads
.
getNumThreads
();
int
numClusters
=
settle
.
getNumClusters
();
...
...
@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() {
}
void
CpuSETTLE
::
apply
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
)
{
ApplyToPositionsTask
task
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
,
threadSettle
);
threads
.
execute
(
task
);
gmx_atomic_t
atomicCounter
;
gmx_atomic_set
(
&
atomicCounter
,
0
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
apply
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
);
}
});
threads
.
waitForThreads
();
}
void
CpuSETTLE
::
applyToVelocities
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
velocities
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
)
{
ApplyToVelocitiesTask
task
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
,
threadSettle
);
threads
.
execute
(
task
);
gmx_atomic_t
atomicCounter
;
gmx_atomic_set
(
&
atomicCounter
,
0
);
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
while
(
true
)
{
int
index
=
gmx_atomic_fetch_add
(
&
atomicCounter
,
1
);
if
(
index
>=
threadSettle
.
size
())
break
;
threadSettle
[
index
]
->
applyToVelocities
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
);
}
});
threads
.
waitForThreads
();
}
platforms/cuda/include/CudaKernels.h
View file @
859bfd6c
...
...
@@ -198,7 +198,6 @@ public:
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
class
GetPositionsTask
;
CudaContext
&
cu
;
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
859bfd6c
...
...
@@ -147,11 +147,29 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts
[
i
]
->
setTime
(
time
);
}
class
CudaUpdateStateDataKernel
::
GetPositionsTask
:
public
ThreadPool
::
Task
{
public:
GetPositionsTask
(
CudaContext
&
cu
,
vector
<
Vec3
>&
positions
,
vector
<
float4
>&
posCorrection
)
:
cu
(
cu
),
positions
(
positions
),
posCorrection
(
posCorrection
)
{
void
CudaUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
vector
<
Vec3
>&
positions
)
{
cu
.
setAsCurrent
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
vector
<
float4
>
posCorrection
;
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
}
else
if
(
cu
.
getUseMixedPrecision
())
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
,
false
);
posCorrection
.
resize
(
numParticles
);
cu
.
getPosqCorrection
().
download
(
posCorrection
);
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Filling in the output array is done in parallel for speed.
cu
.
getPlatformData
().
threads
.
execute
([
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
...
...
@@ -186,36 +204,7 @@ public:
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
}
CudaContext
&
cu
;
vector
<
Vec3
>&
positions
;
vector
<
float4
>&
posCorrection
;
};
void
CudaUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
vector
<
Vec3
>&
positions
)
{
cu
.
setAsCurrent
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
vector
<
float4
>
posCorrection
;
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
}
else
if
(
cu
.
getUseMixedPrecision
())
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
,
false
);
posCorrection
.
resize
(
numParticles
);
cu
.
getPosqCorrection
().
download
(
posCorrection
);
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask
task
(
cu
,
positions
,
posCorrection
);
cu
.
getPlatformData
().
threads
.
execute
(
task
);
});
cu
.
getPlatformData
().
threads
.
waitForThreads
();
}
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
859bfd6c
...
...
@@ -176,7 +176,6 @@ public:
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
class
GetPositionsTask
;
OpenCLContext
&
cl
;
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
859bfd6c
...
...
@@ -171,11 +171,28 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts[i]->setTime(time);
}
class OpenCLUpdateStateDataKernel::GetPositionsTask : public ThreadPool::Task {
public:
GetPositionsTask(OpenCLContext& cl, vector<Vec3>& positions, vector<mm_float4>& posCorrection) : cl(cl), positions(positions), posCorrection(posCorrection) {
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<mm_float4> posCorrection;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
void execute(ThreadPool& threads, int threadIndex) {
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cl.getPosqCorrection().download(posCorrection);
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
cl.getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector<int>& order = cl.getAtomIndex();
...
...
@@ -210,35 +227,7 @@ public:
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
}
OpenCLContext& cl;
vector<Vec3>& positions;
vector<mm_float4>& posCorrection;
};
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<mm_float4> posCorrection;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cl.getPosqCorrection().download(posCorrection);
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask task(cl, positions, posCorrection);
cl.getPlatformData().threads.execute(task);
});
cl.getPlatformData().threads.waitForThreads();
}
...
...
platforms/reference/src/SimTKReference/ReferenceCCMAAlgorithm.cpp
View file @
859bfd6c
/* Portions copyright (c) 2006-201
5
Stanford University and Simbios.
/* Portions copyright (c) 2006-201
7
Stanford University and Simbios.
* Contributors: Peter Eastman, Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
...
...
@@ -38,42 +38,6 @@
using
namespace
OpenMM
;
using
namespace
std
;
// This class extracts columns from the inverse matrix one at a time. It is done in parallel,
// since this can be very slow.
class
ExtractMatrixTask
:
public
ThreadPool
::
Task
{
public:
ExtractMatrixTask
(
int
numConstraints
,
vector
<
vector
<
pair
<
int
,
RealOpenMM
>
>
>&
transposedMatrix
,
const
vector
<
RealOpenMM
>&
distance
,
RealOpenMM
elementCutoff
,
const
int
*
qRowStart
,
const
int
*
qColIndex
,
const
int
*
rRowStart
,
const
int
*
rColIndex
,
const
double
*
qValue
,
const
double
*
rValue
)
:
numConstraints
(
numConstraints
),
transposedMatrix
(
transposedMatrix
),
distance
(
distance
),
elementCutoff
(
elementCutoff
),
qRowStart
(
qRowStart
),
qColIndex
(
qColIndex
),
rRowStart
(
rRowStart
),
rColIndex
(
rColIndex
),
qValue
(
qValue
),
rValue
(
rValue
)
{
}
void
execute
(
ThreadPool
&
pool
,
int
threadIndex
)
{
vector
<
double
>
rhs
(
numConstraints
);
for
(
int
i
=
threadIndex
;
i
<
numConstraints
;
i
+=
pool
.
getNumThreads
())
{
// Extract column i of the inverse matrix.
for
(
int
j
=
0
;
j
<
numConstraints
;
j
++
)
rhs
[
j
]
=
(
i
==
j
?
1.0
:
0.0
);
QUERN_multiply_with_q_transpose
(
numConstraints
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
QUERN_solve_with_r
(
numConstraints
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numConstraints
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
i
]
/
distance
[
j
];
if
(
FABS
((
RealOpenMM
)
value
)
>
elementCutoff
)
transposedMatrix
[
i
].
push_back
(
pair
<
int
,
RealOpenMM
>
(
j
,
(
RealOpenMM
)
value
));
}
}
}
private:
int
numConstraints
;
vector
<
vector
<
pair
<
int
,
RealOpenMM
>
>
>&
transposedMatrix
;
const
vector
<
RealOpenMM
>&
distance
;
RealOpenMM
elementCutoff
;
const
int
*
qRowStart
,
*
qColIndex
,
*
rRowStart
,
*
rColIndex
;
const
double
*
qValue
,
*
rValue
;
};
ReferenceCCMAAlgorithm
::
ReferenceCCMAAlgorithm
(
int
numberOfAtoms
,
int
numberOfConstraints
,
const
vector
<
pair
<
int
,
int
>
>&
atomIndices
,
...
...
@@ -194,9 +158,27 @@ ReferenceCCMAAlgorithm::ReferenceCCMAAlgorithm(int numberOfAtoms,
&
qRowStart
,
&
qColIndex
,
&
qValue
,
&
rRowStart
,
&
rColIndex
,
&
rValue
);
vector
<
vector
<
pair
<
int
,
RealOpenMM
>
>
>
transposedMatrix
(
numberOfConstraints
);
_matrix
.
resize
(
numberOfConstraints
);
// Extract columns from the inverse matrix one at a time. It is done in parallel,
// since this can be very slow.
ThreadPool
threads
;
ExtractMatrixTask
task
(
numberOfConstraints
,
transposedMatrix
,
_distance
,
_elementCutoff
,
qRowStart
,
qColIndex
,
rRowStart
,
rColIndex
,
qValue
,
rValue
);
threads
.
execute
(
task
);
threads
.
execute
([
&
]
(
ThreadPool
&
pool
,
int
threadIndex
)
{
vector
<
double
>
rhs
(
numberOfConstraints
);
for
(
int
i
=
threadIndex
;
i
<
numberOfConstraints
;
i
+=
pool
.
getNumThreads
())
{
// Extract column i of the inverse matrix.
for
(
int
j
=
0
;
j
<
numberOfConstraints
;
j
++
)
rhs
[
j
]
=
(
i
==
j
?
1.0
:
0.0
);
QUERN_multiply_with_q_transpose
(
numberOfConstraints
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
QUERN_solve_with_r
(
numberOfConstraints
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numberOfConstraints
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
i
]
/
distance
[
j
];
if
(
FABS
((
RealOpenMM
)
value
)
>
elementCutoff
)
transposedMatrix
[
i
].
push_back
(
pair
<
int
,
RealOpenMM
>
(
j
,
(
RealOpenMM
)
value
));
}
}
});
threads
.
waitForThreads
();
// For purposes of thread safety we extracted the matrix in transposed form, so we need to transpose it again.
...
...
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
859bfd6c
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -334,16 +334,6 @@ static void interpolateForces(float* posq, float* force, float* grid, int gridx,
}
}
class
CpuCalcPmeReciprocalForceKernel
::
ComputeTask
:
public
ThreadPool
::
Task
{
public:
ComputeTask
(
CpuCalcPmeReciprocalForceKernel
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
runWorkerThread
(
threads
,
threadIndex
);
}
CpuCalcPmeReciprocalForceKernel
&
owner
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuCalcPmeReciprocalForceKernel
&
owner
=
*
reinterpret_cast
<
CpuCalcPmeReciprocalForceKernel
*>
(
args
);
owner
.
runMainThread
();
...
...
@@ -483,9 +473,8 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
if
(
isDeleted
)
break
;
posq
=
io
->
getPosq
();
ComputeTask
task
(
*
this
);
gmx_atomic_set
(
&
atomicCounter
,
0
);
threads
.
execute
(
task
);
// Signal threads to perform charge spreading.
threads
.
execute
(
[
&
]
(
ThreadPool
&
threads
,
int
threadIndex
)
{
runWorkerThread
(
threads
,
threadIndex
);
}
);
// Signal threads to perform charge spreading.
threads
.
waitForThreads
();
threads
.
resumeThreads
();
// Signal threads to sum the charge grids.
threads
.
waitForThreads
();
...
...
plugins/cpupme/src/CpuPmeKernels.h
View file @
859bfd6c
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2013-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -102,7 +102,6 @@ public:
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
class
ComputeTask
;
/**
* Select a size for one grid dimension that FFTW can handle efficiently.
*/
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment