Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d927ff49
"platforms/common/vscode:/vscode.git/clone" did not exist on "c1d643e2fbfbcbe4c84ebabb05738c34c7647d9e"
Commit
d927ff49
authored
Aug 19, 2016
by
Peter Eastman
Browse files
Minor optimizations
parent
27550631
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
25 additions
and
3 deletions
+25
-3
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+1
-1
platforms/cuda/src/kernels/findInteractingBlocks.cu
platforms/cuda/src/kernels/findInteractingBlocks.cu
+12
-1
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+12
-1
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
d927ff49
...
...
@@ -205,6 +205,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int
major
,
minor
;
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
device
));
int
numThreadBlocksPerComputeUnit
=
(
major
>=
6
?
4
:
6
);
#if __CUDA_API_VERSION < 7000
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support
...
...
@@ -241,7 +242,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
numAtomBlocks
=
(
paddedNumAtoms
+
(
TileSize
-
1
))
/
TileSize
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
int
numThreadBlocksPerComputeUnit
=
6
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
if
(
useDoublePrecision
)
{
posq
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
...
...
platforms/cuda/src/kernels/findInteractingBlocks.cu
View file @
d927ff49
...
...
@@ -27,8 +27,18 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
maxPos
=
make_real4
(
max
(
maxPos
.
x
,
pos
.
x
),
max
(
maxPos
.
y
,
pos
.
y
),
max
(
maxPos
.
z
,
pos
.
z
),
0
);
}
real4
blockSize
=
0.5
f
*
(
maxPos
-
minPos
);
real4
center
=
0.5
f
*
(
maxPos
+
minPos
);
blockSize
.
w
=
0
;
for
(
int
i
=
base
+
1
;
i
<
last
;
i
++
)
{
pos
=
posq
[
i
];
real4
delta
=
posq
[
i
]
-
center
;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
blockSize
.
w
=
max
(
blockSize
.
w
,
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
}
blockBoundingBox
[
index
]
=
blockSize
;
blockCenter
[
index
]
=
0.5
f
*
(
maxPos
+
minPos
)
;
blockCenter
[
index
]
=
center
;
sortedBlocks
[
index
]
=
make_real2
(
blockSize
.
x
+
blockSize
.
y
+
blockSize
.
z
,
index
);
index
+=
blockDim
.
x
*
gridDim
.
x
;
base
=
index
*
TILE_SIZE
;
...
...
@@ -182,6 +192,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#endif
includeBlock2
&=
(
blockDelta
.
x
*
blockDelta
.
x
+
blockDelta
.
y
*
blockDelta
.
y
+
blockDelta
.
z
*
blockDelta
.
z
<
(
PADDED_CUTOFF
+
blockSizeX
.
w
+
blockSizeY
.
w
)
*
(
PADDED_CUTOFF
+
blockSizeX
.
w
+
blockSizeY
.
w
));
blockDelta
.
x
=
max
(
0.0
f
,
fabs
(
blockDelta
.
x
)
-
blockSizeX
.
x
-
blockSizeY
.
x
);
blockDelta
.
y
=
max
(
0.0
f
,
fabs
(
blockDelta
.
y
)
-
blockSizeX
.
y
-
blockSizeY
.
y
);
blockDelta
.
z
=
max
(
0.0
f
,
fabs
(
blockDelta
.
z
)
-
blockSizeX
.
z
-
blockSizeY
.
z
);
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
d927ff49
...
...
@@ -27,8 +27,18 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
maxPos
=
max
(
maxPos,
pos
)
;
}
real4
blockSize
=
0.5f*
(
maxPos-minPos
)
;
real4
center
=
0.5f*
(
maxPos+minPos
)
;
blockSize.w
=
0
;
for
(
int
i
=
base+1
; i < last; i++) {
pos
=
posq[i]
;
real4
delta
=
posq[i]-center
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
blockSize.w
=
max
(
blockSize.w,
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
)
;
}
blockBoundingBox[index]
=
blockSize
;
blockCenter[index]
=
0.5f*
(
maxPos+minPos
)
;
blockCenter[index]
=
center
;
sortedBlocks[index]
=
(
real2
)
(
blockSize.x+blockSize.y+blockSize.z,
index
)
;
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
...
...
@@ -142,6 +152,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#
endif
includeBlock2
&=
(
blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z
<
(
PADDED_CUTOFF+blockSizeX.w+blockSizeY.w
)
*
(
PADDED_CUTOFF+blockSizeX.w+blockSizeY.w
))
;
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSizeX.x-blockSizeY.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSizeX.y-blockSizeY.y
)
;
blockDelta.z
=
max
((
real
)
0
,
fabs
(
blockDelta.z
)
-blockSizeX.z-blockSizeY.z
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment