Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
839ab51e
Commit
839ab51e
authored
May 27, 2011
by
Peter Eastman
Browse files
Optimization to improve cache coherency in charge spreading kernel
parent
92766e8a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
4 deletions
+6
-4
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+3
-2
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+3
-2
No files found.
platforms/cuda/src/kernels/kCalculatePME.cu
View file @
839ab51e
...
@@ -171,6 +171,7 @@ void kUpdateBsplines_kernel()
...
@@ -171,6 +171,7 @@ void kUpdateBsplines_kernel()
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
{
data
[
j
].
w
=
posq
.
w
;
// Storing the charge here improves cache coherency in the charge spreading kernel
cSim
.
pPmeBsplineTheta
[
i
+
j
*
cSim
.
atoms
]
=
data
[
j
];
cSim
.
pPmeBsplineTheta
[
i
+
j
*
cSim
.
atoms
]
=
data
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
+
j
*
cSim
.
atoms
]
=
ddata
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
+
j
*
cSim
.
atoms
]
=
ddata
[
j
];
}
}
...
@@ -256,7 +257,7 @@ void kGridSpreadCharge_kernel()
...
@@ -256,7 +257,7 @@ void kGridSpreadCharge_kernel()
int
atomIndex
=
atomData
.
x
;
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
cSim
.
pmeGridSize
.
z
);
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
cSim
.
pmeGridSize
.
z
);
float
atomCharge
=
cSim
.
pPosq
[
atomIndex
]
.
w
;
float
atomCharge
=
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
ix
*
cSim
.
atoms
)
.
w
;
result
+=
atomCharge
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
ix
*
cSim
.
atoms
).
x
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iy
*
cSim
.
atoms
).
y
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iz
*
cSim
.
atoms
).
z
;
result
+=
atomCharge
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
ix
*
cSim
.
atoms
).
x
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iy
*
cSim
.
atoms
).
y
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iz
*
cSim
.
atoms
).
z
;
}
}
if
(
z1
>
gridPoint
.
z
)
if
(
z1
>
gridPoint
.
z
)
...
@@ -271,7 +272,7 @@ void kGridSpreadCharge_kernel()
...
@@ -271,7 +272,7 @@ void kGridSpreadCharge_kernel()
int
atomIndex
=
atomData
.
x
;
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
cSim
.
pmeGridSize
.
z
);
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
cSim
.
pmeGridSize
.
z
);
float
atomCharge
=
cSim
.
pPosq
[
atomIndex
]
.
w
;
float
atomCharge
=
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
ix
*
cSim
.
atoms
)
.
w
;
result
+=
atomCharge
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
ix
*
cSim
.
atoms
).
x
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iy
*
cSim
.
atoms
).
y
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iz
*
cSim
.
atoms
).
z
;
result
+=
atomCharge
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
ix
*
cSim
.
atoms
).
x
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iy
*
cSim
.
atoms
).
y
*
tex1Dfetch
(
bsplineThetaRef
,
atomIndex
+
iz
*
cSim
.
atoms
).
z
;
}
}
}
}
...
...
platforms/opencl/src/kernels/pme.cl
View file @
839ab51e
...
@@ -37,6 +37,7 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
...
@@ -37,6 +37,7 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
float4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
float4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
float4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
float4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j].w
=
pos.w
; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
pmeBsplineDTheta[i+j*NUM_ATOMS]
=
ddata[j]
;
pmeBsplineDTheta[i+j*NUM_ATOMS]
=
ddata[j]
;
}
}
...
@@ -108,7 +109,7 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
...
@@ -108,7 +109,7 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
int
atomIndex
=
atomData.x
;
int
atomIndex
=
atomData.x
;
int
z
=
atomData.y
;
int
z
=
atomData.y
;
int
iz
=
gridPoint.z-z+
(
gridPoint.z
>=
z
?
0
:
GRID_SIZE_Z
)
;
int
iz
=
gridPoint.z-z+
(
gridPoint.z
>=
z
?
0
:
GRID_SIZE_Z
)
;
float
atomCharge
=
p
osq[atomIndex
].w
;
float
atomCharge
=
p
meBsplineTheta[atomIndex+ix*NUM_ATOMS
].w
;
result
+=
atomCharge*pmeBsplineTheta[atomIndex+ix*NUM_ATOMS].x*pmeBsplineTheta[atomIndex+iy*NUM_ATOMS].y*pmeBsplineTheta[atomIndex+iz*NUM_ATOMS].z
;
result
+=
atomCharge*pmeBsplineTheta[atomIndex+ix*NUM_ATOMS].x*pmeBsplineTheta[atomIndex+iy*NUM_ATOMS].y*pmeBsplineTheta[atomIndex+iz*NUM_ATOMS].z
;
}
}
if
(
z1
>
gridPoint.z
)
if
(
z1
>
gridPoint.z
)
...
@@ -123,7 +124,7 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
...
@@ -123,7 +124,7 @@ __kernel void gridSpreadCharge(__global float4* posq, __global int2* pmeAtomGrid
int
atomIndex
=
atomData.x
;
int
atomIndex
=
atomData.x
;
int
z
=
atomData.y
;
int
z
=
atomData.y
;
int
iz
=
gridPoint.z-z+
(
gridPoint.z
>=
z
?
0
:
GRID_SIZE_Z
)
;
int
iz
=
gridPoint.z-z+
(
gridPoint.z
>=
z
?
0
:
GRID_SIZE_Z
)
;
float
atomCharge
=
p
osq[atomIndex
].w
;
float
atomCharge
=
p
meBsplineTheta[atomIndex+ix*NUM_ATOMS
].w
;
result
+=
atomCharge*pmeBsplineTheta[atomIndex+ix*NUM_ATOMS].x*pmeBsplineTheta[atomIndex+iy*NUM_ATOMS].y*pmeBsplineTheta[atomIndex+iz*NUM_ATOMS].z
;
result
+=
atomCharge*pmeBsplineTheta[atomIndex+ix*NUM_ATOMS].x*pmeBsplineTheta[atomIndex+iy*NUM_ATOMS].y*pmeBsplineTheta[atomIndex+iz*NUM_ATOMS].z
;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment