Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6c6d68d2
Commit
6c6d68d2
authored
May 26, 2016
by
peastman
Browse files
Merge pull request #1499 from peastman/reallocneighbors
Improved method for resizing neighbor list
parents
c2be00a6
6cc2edd1
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
138 additions
and
146 deletions
+138
-146
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+28
-30
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+22
-24
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
...eba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+22
-23
plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
.../amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
+22
-23
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+22
-23
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
.../platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+22
-23
No files found.
platforms/opencl/src/kernels/nonbonded.cl
View file @
6c6d68d2
...
@@ -200,6 +200,8 @@ __kernel void computeNonbonded(
...
@@ -200,6 +200,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long)numTileIndices/totalWarps : warp*(long)numTiles/totalWarps);
int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long)numTileIndices/totalWarps : warp*(long)numTiles/totalWarps);
int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long)numTileIndices/totalWarps : (warp+1)*(long)numTiles/totalWarps);
int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long)numTileIndices/totalWarps : (warp+1)*(long)numTiles/totalWarps);
#else
#else
...
@@ -223,42 +225,38 @@ __kernel void computeNonbonded(
...
@@ -223,42 +225,38 @@ __kernel void computeNonbonded(
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
}
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{
y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
}
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS;
ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
}
while (skipTiles[currentSkipIndex] < pos)
else
currentSkipIndex++;
skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos);
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) {
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
unsigned int atom1 = x*TILE_SIZE + tgx;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
6c6d68d2
...
@@ -214,6 +214,8 @@ __kernel void computeNonbonded(
...
@@ -214,6 +214,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
#else
#else
...
@@ -234,35 +236,31 @@ __kernel void computeNonbonded(
...
@@ -234,35 +236,31 @@ __kernel void computeNonbonded(
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
}
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{
y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
}
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) {
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
}
}
includeTile = (nextToSkip != pos);
else
nextToSkip = end;
}
}
includeTile = (nextToSkip != pos);
#endif
if (includeTile) {
if (includeTile) {
// Load the data for this tile.
// Load the data for this tile.
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
View file @
6c6d68d2
...
@@ -488,6 +488,8 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -488,6 +488,8 @@ extern "C" __global__ void computeElectrostatics(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -508,34 +510,31 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -508,34 +510,31 @@ extern "C" __global__ void computeElectrostatics(
int
x
,
y
;
int
x
,
y
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
View file @
6c6d68d2
...
@@ -592,6 +592,8 @@ extern "C" __global__ void computeFixedField(
...
@@ -592,6 +592,8 @@ extern "C" __global__ void computeFixedField(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -612,34 +614,31 @@ extern "C" __global__ void computeFixedField(
...
@@ -612,34 +614,31 @@ extern "C" __global__ void computeFixedField(
int
x
,
y
;
int
x
,
y
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
6c6d68d2
...
@@ -454,6 +454,8 @@ extern "C" __global__ void computeInducedField(
...
@@ -454,6 +454,8 @@ extern "C" __global__ void computeInducedField(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -474,34 +476,31 @@ extern "C" __global__ void computeInducedField(
...
@@ -474,34 +476,31 @@ extern "C" __global__ void computeInducedField(
int
x
,
y
;
int
x
,
y
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
View file @
6c6d68d2
...
@@ -555,6 +555,8 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -555,6 +555,8 @@ extern "C" __global__ void computeElectrostatics(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -575,34 +577,31 @@ extern "C" __global__ void computeElectrostatics(
...
@@ -575,34 +577,31 @@ extern "C" __global__ void computeElectrostatics(
int
x
,
y
;
int
x
,
y
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment