Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
684ba725
Commit
684ba725
authored
Jun 19, 2013
by
peastman
Browse files
Parallelized charge spreading for CPU based PME.
parent
58e0996f
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
36 additions
and
15 deletions
+36
-15
platforms/cuda/src/CpuPme.cpp
platforms/cuda/src/CpuPme.cpp
+36
-15
No files found.
platforms/cuda/src/CpuPme.cpp
View file @
684ba725
...
@@ -90,7 +90,7 @@ static int getNumProcessors() {
...
@@ -90,7 +90,7 @@ static int getNumProcessors() {
#endif
#endif
}
}
static
void
spreadCharge
(
float
*
posq
,
float
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
int
numParticles
,
Vec3
periodicBoxSize
)
{
static
void
spreadCharge
(
int
start
,
int
end
,
float
*
posq
,
float
*
grid
,
int
gridx
,
int
gridy
,
int
gridz
,
int
numParticles
,
Vec3
periodicBoxSize
)
{
float
temp
[
4
];
float
temp
[
4
];
__m128
boxSize
=
_mm_set_ps
(
0
,
(
float
)
periodicBoxSize
[
2
],
(
float
)
periodicBoxSize
[
1
],
(
float
)
periodicBoxSize
[
0
]);
__m128
boxSize
=
_mm_set_ps
(
0
,
(
float
)
periodicBoxSize
[
2
],
(
float
)
periodicBoxSize
[
1
],
(
float
)
periodicBoxSize
[
0
]);
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
float
)
(
1
/
periodicBoxSize
[
2
]),
(
float
)
(
1
/
periodicBoxSize
[
1
]),
(
float
)
(
1
/
periodicBoxSize
[
0
]));
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
float
)
(
1
/
periodicBoxSize
[
2
]),
(
float
)
(
1
/
periodicBoxSize
[
1
]),
(
float
)
(
1
/
periodicBoxSize
[
0
]));
...
@@ -100,7 +100,7 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
...
@@ -100,7 +100,7 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
__m128
scale
=
_mm_set1_ps
(
1.0
f
/
(
PME_ORDER
-
1
));
__m128
scale
=
_mm_set1_ps
(
1.0
f
/
(
PME_ORDER
-
1
));
const
float
epsilonFactor
=
sqrt
(
ONE_4PI_EPS0
);
const
float
epsilonFactor
=
sqrt
(
ONE_4PI_EPS0
);
memset
(
grid
,
0
,
sizeof
(
float
)
*
gridx
*
gridy
*
gridz
);
memset
(
grid
,
0
,
sizeof
(
float
)
*
gridx
*
gridy
*
gridz
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
// Find the position relative to the nearest grid point.
// Find the position relative to the nearest grid point.
__m128
pos
=
_mm_load_ps
(
&
posq
[
4
*
i
]);
__m128
pos
=
_mm_load_ps
(
&
posq
[
4
*
i
]);
...
@@ -303,6 +303,9 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
...
@@ -303,6 +303,9 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
int
gridIndexX
=
_mm_extract_epi32
(
gridIndex
,
0
);
int
gridIndexX
=
_mm_extract_epi32
(
gridIndex
,
0
);
int
gridIndexY
=
_mm_extract_epi32
(
gridIndex
,
1
);
int
gridIndexY
=
_mm_extract_epi32
(
gridIndex
,
1
);
int
gridIndexZ
=
_mm_extract_epi32
(
gridIndex
,
2
);
int
gridIndexZ
=
_mm_extract_epi32
(
gridIndex
,
2
);
__m128
zdata
[
PME_ORDER
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
zdata
[
j
]
=
_mm_set_ps
(
0
,
extractFloat
(
ddata
[
j
],
2
),
extractFloat
(
data
[
j
],
2
),
extractFloat
(
data
[
j
],
2
));
__m128
f
=
_mm_set1_ps
(
0
);
__m128
f
=
_mm_set1_ps
(
0
);
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndexX
+
ix
;
int
xbase
=
gridIndexX
+
ix
;
...
@@ -324,10 +327,7 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
...
@@ -324,10 +327,7 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
int
zindex
=
gridIndexZ
+
iz
;
int
zindex
=
gridIndexZ
+
iz
;
zindex
-=
(
zindex
>=
gridz
?
gridz
:
0
);
zindex
-=
(
zindex
>=
gridz
?
gridz
:
0
);
__m128
gridValue
=
_mm_set1_ps
(
grid
[
ybase
+
zindex
]);
__m128
gridValue
=
_mm_set1_ps
(
grid
[
ybase
+
zindex
]);
float
dz
=
extractFloat
(
data
[
iz
],
2
);
f
=
_mm_add_ps
(
f
,
_mm_mul_ps
(
xydata
,
_mm_mul_ps
(
zdata
[
iz
],
gridValue
)));
float
ddz
=
extractFloat
(
ddata
[
iz
],
2
);
__m128
zdata
=
_mm_set_ps
(
0
,
ddz
,
dz
,
dz
);
f
=
_mm_add_ps
(
f
,
_mm_mul_ps
(
xydata
,
_mm_mul_ps
(
zdata
,
gridValue
)));
}
}
}
}
}
}
...
@@ -340,7 +340,8 @@ class CpuPme::ThreadData {
...
@@ -340,7 +340,8 @@ class CpuPme::ThreadData {
public:
public:
CpuPme
&
owner
;
CpuPme
&
owner
;
int
index
;
int
index
;
ThreadData
(
CpuPme
&
owner
,
int
index
)
:
owner
(
owner
),
index
(
index
)
{
float
*
tempGrid
;
ThreadData
(
CpuPme
&
owner
,
int
index
)
:
owner
(
owner
),
index
(
index
),
tempGrid
(
NULL
)
{
}
}
};
};
...
@@ -368,11 +369,12 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha)
...
@@ -368,11 +369,12 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha)
ThreadData
*
data
=
new
ThreadData
(
*
this
,
i
);
ThreadData
*
data
=
new
ThreadData
(
*
this
,
i
);
threadData
.
push_back
(
data
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
data
->
tempGrid
=
(
float
*
)
fftwf_malloc
(
sizeof
(
float
)
*
(
gridx
*
gridy
*
gridz
+
3
));
}
}
// Initialize FFTW.
// Initialize FFTW.
realGrid
=
(
float
*
)
fftwf_malloc
(
sizeof
(
float
)
*
gridx
*
gridy
*
gridz
)
;
realGrid
=
threadData
[
0
]
->
tempGrid
;
complexGrid
=
(
fftwf_complex
*
)
fftwf_malloc
(
sizeof
(
fftwf_complex
)
*
gridx
*
gridy
*
(
gridz
/
2
+
1
));
complexGrid
=
(
fftwf_complex
*
)
fftwf_malloc
(
sizeof
(
fftwf_complex
)
*
gridx
*
gridy
*
(
gridz
/
2
+
1
));
fftwf_plan_with_nthreads
(
numThreads
);
fftwf_plan_with_nthreads
(
numThreads
);
forwardFFT
=
fftwf_plan_dft_r2c_3d
(
gridx
,
gridy
,
gridz
,
realGrid
,
complexGrid
,
FFTW_MEASURE
);
forwardFFT
=
fftwf_plan_dft_r2c_3d
(
gridx
,
gridy
,
gridz
,
realGrid
,
complexGrid
,
FFTW_MEASURE
);
...
@@ -445,11 +447,16 @@ CpuPme::~CpuPme() {
...
@@ -445,11 +447,16 @@ CpuPme::~CpuPme() {
isFinished
=
true
;
isFinished
=
true
;
pthread_cond_broadcast
(
&
startCondition
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
thread
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
pthread_cond_destroy
(
&
endCondition
);
for
(
int
i
=
0
;
i
<
(
int
)
threadData
.
size
();
i
++
)
{
if
(
threadData
[
i
]
->
tempGrid
!=
NULL
)
fftwf_free
(
threadData
[
i
]
->
tempGrid
);
delete
threadData
[
i
];
}
}
}
#include <sys/time.h>
#include <sys/time.h>
...
@@ -460,20 +467,33 @@ double diff(struct timeval t1, struct timeval t2) {
...
@@ -460,20 +467,33 @@ double diff(struct timeval t1, struct timeval t2) {
void
CpuPme
::
runThread
(
int
index
)
{
void
CpuPme
::
runThread
(
int
index
)
{
int
particleStart
=
(
index
*
numParticles
)
/
numThreads
;
int
particleStart
=
(
index
*
numParticles
)
/
numThreads
;
int
particleEnd
=
((
index
+
1
)
*
numParticles
)
/
numThreads
;
int
particleEnd
=
((
index
+
1
)
*
numParticles
)
/
numThreads
;
int
gridStart
=
(
index
*
gridx
)
/
numThreads
;
int
gridxStart
=
(
index
*
gridx
)
/
numThreads
;
int
gridEnd
=
((
index
+
1
)
*
gridx
)
/
numThreads
;
int
gridxEnd
=
((
index
+
1
)
*
gridx
)
/
numThreads
;
int
gridSize
=
(
gridx
*
gridy
*
gridz
+
3
)
/
4
;
int
gridStart
=
4
*
((
index
*
gridSize
)
/
numThreads
);
int
gridEnd
=
4
*
(((
index
+
1
)
*
gridSize
)
/
numThreads
);
while
(
!
isFinished
)
{
while
(
!
isFinished
)
{
threadWait
();
spreadCharge
(
particleStart
,
particleEnd
,
posq
,
threadData
[
index
]
->
tempGrid
,
gridx
,
gridy
,
gridz
,
numParticles
,
periodicBoxSize
);
threadWait
();
int
numGrids
=
threadData
.
size
();
for
(
int
i
=
gridStart
;
i
<
gridEnd
;
i
+=
4
)
{
__m128
sum
=
_mm_load_ps
(
&
realGrid
[
i
]);
for
(
int
j
=
1
;
j
<
numGrids
;
j
++
)
sum
=
_mm_add_ps
(
sum
,
_mm_load_ps
(
&
threadData
[
j
]
->
tempGrid
[
i
]));
_mm_store_ps
(
&
realGrid
[
i
],
sum
);
}
threadWait
();
threadWait
();
if
(
isFinished
)
if
(
isFinished
)
break
;
break
;
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
double
threadEnergy
=
reciprocalEnergy
(
gridStart
,
gridEnd
,
&
complexGrid
[
0
]
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxSize
);
double
threadEnergy
=
reciprocalEnergy
(
grid
x
Start
,
grid
x
End
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxSize
);
pthread_mutex_lock
(
&
lock
);
pthread_mutex_lock
(
&
lock
);
energy
+=
threadEnergy
;
energy
+=
threadEnergy
;
pthread_mutex_unlock
(
&
lock
);
pthread_mutex_unlock
(
&
lock
);
threadWait
();
threadWait
();
}
}
reciprocalConvolution
(
gridStart
,
gridEnd
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxSize
);
reciprocalConvolution
(
grid
x
Start
,
grid
x
End
,
complexGrid
,
gridx
,
gridy
,
gridz
,
alpha
,
bsplineModuli
,
periodicBoxSize
);
threadWait
();
threadWait
();
interpolateForces
(
particleStart
,
particleEnd
,
posq
,
force
,
realGrid
,
gridx
,
gridy
,
gridz
,
numParticles
,
periodicBoxSize
);
interpolateForces
(
particleStart
,
particleEnd
,
posq
,
force
,
realGrid
,
gridx
,
gridy
,
gridz
,
numParticles
,
periodicBoxSize
);
}
}
...
@@ -505,7 +525,8 @@ double CpuPme::computeForceAndEnergy(float* posq, float* force, Vec3 periodicBox
...
@@ -505,7 +525,8 @@ double CpuPme::computeForceAndEnergy(float* posq, float* force, Vec3 periodicBox
energy
=
0.0
;
energy
=
0.0
;
struct
timeval
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
;
struct
timeval
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
;
gettimeofday
(
&
t1
,
NULL
);
gettimeofday
(
&
t1
,
NULL
);
spreadCharge
(
posq
,
realGrid
,
gridx
,
gridy
,
gridz
,
numParticles
,
periodicBoxSize
);
advanceThreads
();
// Signal threads to perform charge spreading.
advanceThreads
();
// Signal threads to sum the charge grids.
gettimeofday
(
&
t2
,
NULL
);
gettimeofday
(
&
t2
,
NULL
);
fftwf_execute_dft_r2c
(
forwardFFT
,
realGrid
,
complexGrid
);
fftwf_execute_dft_r2c
(
forwardFFT
,
realGrid
,
complexGrid
);
gettimeofday
(
&
t3
,
NULL
);
gettimeofday
(
&
t3
,
NULL
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment