Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
99ed3ba7
Commit
99ed3ba7
authored
Apr 03, 2015
by
Peter Eastman
Browse files
Optimization for Nvidia
parent
4959bb9c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
1 deletion
+14
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+4
-1
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+10
-0
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
99ed3ba7
...
@@ -1666,7 +1666,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1666,7 +1666,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
bool
isNvidia
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
if
(
isNvidia
)
pmeDefines
[
"USE_ALTERNATE_MEMORY_ACCESS_PATTERN"
]
=
"1"
;
usePmeQueue
=
isNvidia
;
if
(
usePmeQueue
)
{
if
(
usePmeQueue
)
{
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
...
...
platforms/opencl/src/kernels/pme.cl
View file @
99ed3ba7
...
@@ -138,7 +138,13 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -138,7 +138,13 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
add
=
pos.w*data[ix].x*data[iy].y*data[iz].z
;
real
add
=
pos.w*data[ix].x*data[iy].y*data[iz].z
;
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
atom_add
(
&pmeGrid[index%2
==
0
?
index/2
:
(
index+GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
)
/2],
(
long
)
(
add*0x100000000
))
;
#
else
atom_add
(
&pmeGrid[index],
(
long
)
(
add*0x100000000
))
;
atom_add
(
&pmeGrid[index],
(
long
)
(
add*0x100000000
))
;
#
endif
}
}
}
}
}
}
...
@@ -149,7 +155,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
...
@@ -149,7 +155,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
real
scale
=
EPSILON_FACTOR/
(
real
)
0x100000000
;
real
scale
=
EPSILON_FACTOR/
(
real
)
0x100000000
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
long
value
=
fixedGrid[index%2
==
0
?
index/2
:
(
index+gridSize
)
/2]
;
#
else
long
value
=
fixedGrid[index]
;
long
value
=
fixedGrid[index]
;
#
endif
realGrid[index]
=
(
real
)
(
value*scale
)
;
realGrid[index]
=
(
real
)
(
value*scale
)
;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment