Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
125e52ae
"wrappers/python/vscode:/vscode.git/clone" did not exist on "f2fc1353da2cedf074fc2fd6d5c8ff472ebb51f8"
Commit
125e52ae
authored
Sep 01, 2009
by
Peter Eastman
Browse files
Optimizations for PME
parent
3638f243
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
74 additions
and
38 deletions
+74
-38
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+2
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+7
-1
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+2
-1
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+63
-36
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
125e52ae
...
@@ -359,12 +359,14 @@ struct cudaGmxSimulation {
...
@@ -359,12 +359,14 @@ struct cudaGmxSimulation {
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
int3
pmeGridSize
;
// The dimensions of the grid for particle mesh Ewald
int3
pmeGridSize
;
// The dimensions of the grid for particle mesh Ewald
int3
pmeGroupSize
;
// The dimensions of the groups used in charge spreading for PME
cufftComplex
*
pPmeGrid
;
// Grid points for particle mesh Ewald
cufftComplex
*
pPmeGrid
;
// Grid points for particle mesh Ewald
float
*
pPmeBsplineModuli
[
3
];
float
*
pPmeBsplineModuli
[
3
];
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineDtheta
;
float4
*
pPmeBsplineDtheta
;
int4
*
pPmeParticleIndex
;
// The grid indices for each atom
int4
*
pPmeParticleIndex
;
// The grid indices for each atom
float4
*
pPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
float4
*
pPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
int
*
pPmeInteractionFlags
;
// Flags for which groups of grid points interact with which atoms
unsigned
int
bonds
;
// Number of bonds
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
float2
*
pBondParameter
;
// Bond parameters
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
125e52ae
...
@@ -761,8 +761,12 @@ extern "C"
...
@@ -761,8 +761,12 @@ extern "C"
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
)
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
)
{
{
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
alphaEwald
=
alpha
;
int3
gridSize
=
make_int3
(
16
,
16
,
16
);
int3
gridSize
=
make_int3
(
32
,
32
,
32
);
gpu
->
sim
.
pmeGridSize
=
gridSize
;
gpu
->
sim
.
pmeGridSize
=
gridSize
;
int3
groupSize
=
make_int3
(
2
,
4
,
4
);
gpu
->
sim
.
pmeGroupSize
=
groupSize
;
const
int3
numGroups
=
make_int3
((
gridSize
.
x
+
groupSize
.
x
-
1
)
/
groupSize
.
x
,
(
gridSize
.
y
+
groupSize
.
y
-
1
)
/
groupSize
.
y
,
(
gridSize
.
z
+
groupSize
.
z
-
1
)
/
groupSize
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
cufftPlan3d
(
&
gpu
->
fftplan
,
gridSize
.
x
,
gridSize
.
y
,
gridSize
.
z
,
CUFFT_C2C
);
cufftPlan3d
(
&
gpu
->
fftplan
,
gridSize
.
x
,
gridSize
.
y
,
gridSize
.
z
,
CUFFT_C2C
);
gpu
->
psPmeGrid
=
new
CUDAStream
<
cufftComplex
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
,
1
,
"PmeGrid"
);
gpu
->
psPmeGrid
=
new
CUDAStream
<
cufftComplex
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
,
1
,
"PmeGrid"
);
gpu
->
sim
.
pPmeGrid
=
gpu
->
psPmeGrid
->
_pDevData
;
gpu
->
sim
.
pPmeGrid
=
gpu
->
psPmeGrid
->
_pDevData
;
...
@@ -780,6 +784,8 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha)
...
@@ -780,6 +784,8 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha)
gpu
->
sim
.
pPmeParticleIndex
=
gpu
->
psPmeParticleIndex
->
_pDevData
;
gpu
->
sim
.
pPmeParticleIndex
=
gpu
->
psPmeParticleIndex
->
_pDevData
;
gpu
->
psPmeParticleFraction
=
new
CUDAStream
<
float4
>
(
gpu
->
natoms
,
1
,
"PmeParticleFraction"
);
gpu
->
psPmeParticleFraction
=
new
CUDAStream
<
float4
>
(
gpu
->
natoms
,
1
,
"PmeParticleFraction"
);
gpu
->
sim
.
pPmeParticleFraction
=
gpu
->
psPmeParticleFraction
->
_pDevData
;
gpu
->
sim
.
pPmeParticleFraction
=
gpu
->
psPmeParticleFraction
->
_pDevData
;
gpu
->
psPmeInteractionFlags
=
new
CUDAStream
<
int
>
(
totalGroups
*
(
gpu
->
sim
.
paddedNumberOfAtoms
/
32
),
1
,
"PmeInteractionFlags"
);
gpu
->
sim
.
pPmeInteractionFlags
=
gpu
->
psPmeInteractionFlags
->
_pDevData
;
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
125e52ae
...
@@ -110,8 +110,9 @@ struct _gpuContext {
...
@@ -110,8 +110,9 @@ struct _gpuContext {
CUDAStream
<
float
>*
psPmeBsplineModuli
[
3
];
CUDAStream
<
float
>*
psPmeBsplineModuli
[
3
];
CUDAStream
<
float4
>*
psPmeBsplineTheta
;
CUDAStream
<
float4
>*
psPmeBsplineTheta
;
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
int4
>*
psPmeParticleIndex
;
// The grid indices for each atom
CUDAStream
<
int4
>*
psPmeParticleIndex
;
// The grid indices for each atom
CUDAStream
<
float4
>*
psPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
CUDAStream
<
float4
>*
psPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
CUDAStream
<
int
>*
psPmeInteractionFlags
;
// Flags for which groups of grid points interact with which atoms
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psBornForce
;
CUDAStream
<
float
>*
psBornForce
;
...
...
platforms/cuda/src/kernels/kCalculatePME.cu
View file @
125e52ae
...
@@ -94,9 +94,6 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
...
@@ -94,9 +94,6 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
{
{
float4
ftmp
=
cSim
.
pPosq
[
i
];
float4
ftmp
=
cSim
.
pPosq
[
i
];
__syncthreads
();
float3
t
=
make_float3
((
ftmp
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
float3
t
=
make_float3
((
ftmp
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
ftmp
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
ftmp
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
ftmp
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
(
ftmp
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
...
@@ -104,17 +101,53 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
...
@@ -104,17 +101,53 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
ftmp
.
x
=
modff
(
t
.
x
,
&
tix
.
x
);
ftmp
.
x
=
modff
(
t
.
x
,
&
tix
.
x
);
ftmp
.
y
=
modff
(
t
.
y
,
&
tix
.
y
);
ftmp
.
y
=
modff
(
t
.
y
,
&
tix
.
y
);
ftmp
.
z
=
modff
(
t
.
z
,
&
tix
.
z
);
ftmp
.
z
=
modff
(
t
.
z
,
&
tix
.
z
);
cSim
.
pPmeParticleFraction
[
i
]
=
ftmp
;
cSim
.
pPmeParticleFraction
[
i
]
=
ftmp
;
/* avoid costly % operations if possible that is if dc_ngrid.* is pow. of 2 */
int4
itmp
=
make_int4
(
fast_mod
(
__float2int_rd
(
tix
.
x
),
cSim
.
pmeGridSize
.
x
),
int4
itmp
=
make_int4
(
fast_mod
(
__float2int_rd
(
tix
.
x
),
cSim
.
pmeGridSize
.
x
),
fast_mod
(
__float2int_rd
(
tix
.
y
),
cSim
.
pmeGridSize
.
y
),
fast_mod
(
__float2int_rd
(
tix
.
y
),
cSim
.
pmeGridSize
.
y
),
fast_mod
(
__float2int_rd
(
tix
.
z
),
cSim
.
pmeGridSize
.
z
),
0
);
fast_mod
(
__float2int_rd
(
tix
.
z
),
cSim
.
pmeGridSize
.
z
),
0
);
cSim
.
pPmeParticleIndex
[
i
]
=
itmp
;
cSim
.
pPmeParticleIndex
[
i
]
=
itmp
;
}
// Compute flags for which atoms affect which groups of grid points.
__syncthreads
();
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
cSim
.
pmeGroupSize
.
x
-
1
)
/
cSim
.
pmeGroupSize
.
x
,
(
cSim
.
pmeGridSize
.
y
+
cSim
.
pmeGroupSize
.
y
-
1
)
/
cSim
.
pmeGroupSize
.
y
,
(
cSim
.
pmeGridSize
.
z
+
cSim
.
pmeGroupSize
.
z
-
1
)
/
cSim
.
pmeGroupSize
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
const
float3
gridScale
=
make_float3
(
cSim
.
pmeGridSize
.
x
/
cSim
.
periodicBoxSizeX
,
cSim
.
pmeGridSize
.
y
/
cSim
.
periodicBoxSizeY
,
cSim
.
pmeGridSize
.
z
/
cSim
.
periodicBoxSizeZ
);
for
(
int
group
=
tid
;
group
<
totalGroups
;
group
+=
tnb
)
{
int3
gridBase
;
gridBase
.
x
=
group
/
(
numGroups
.
y
*
numGroups
.
z
);
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
x
*=
cSim
.
pmeGroupSize
.
x
;
gridBase
.
y
*=
cSim
.
pmeGroupSize
.
y
;
gridBase
.
z
*=
cSim
.
pmeGroupSize
.
z
;
unsigned
int
flags
=
0
;
unsigned
int
baseIndex
=
group
*
(
cSim
.
paddedNumberOfAtoms
/
32
);
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
{
// Decide if this block actually needs to be processed.
int
flagIndex
=
atomBlock
%
32
;
if
(
flagIndex
==
0
)
flags
=
0
;
float4
boxSize
=
cSim
.
pGridBoundingBox
[
atomBlock
];
float4
center
=
cSim
.
pGridCenter
[
atomBlock
];
int
maxx
=
(
int
)
ceil
((
center
.
x
+
boxSize
.
x
)
*
gridScale
.
x
)
+
cSim
.
pmeGroupSize
.
x
+
PME_ORDER
;
int
maxy
=
(
int
)
ceil
((
center
.
y
+
boxSize
.
y
)
*
gridScale
.
y
)
+
cSim
.
pmeGroupSize
.
y
+
PME_ORDER
;
int
maxz
=
(
int
)
ceil
((
center
.
z
+
boxSize
.
z
)
*
gridScale
.
z
)
+
cSim
.
pmeGroupSize
.
z
+
PME_ORDER
;
int
minx
=
(
int
)
floor
((
center
.
x
-
boxSize
.
x
)
*
gridScale
.
x
);
int
miny
=
(
int
)
floor
((
center
.
y
-
boxSize
.
y
)
*
gridScale
.
y
);
int
minz
=
(
int
)
floor
((
center
.
z
-
boxSize
.
z
)
*
gridScale
.
z
);
int
x
=
minx
+
(
gridBase
.
x
-
minx
)
%
cSim
.
pmeGridSize
.
x
;
int
y
=
miny
+
(
gridBase
.
y
-
miny
)
%
cSim
.
pmeGridSize
.
y
;
int
z
=
minz
+
(
gridBase
.
z
-
minz
)
%
cSim
.
pmeGridSize
.
z
;
if
(
maxx
<
x
||
maxy
<
y
||
maxz
<
z
)
flags
+=
1
<<
flagIndex
;
if
(
flagIndex
==
31
||
atomBlock
==
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
)
cSim
.
pPmeInteractionFlags
[
baseIndex
+
atomBlock
/
32
]
=
flags
;
}
}
}
}
}
...
@@ -140,8 +173,6 @@ __global__ void kUpdateBsplines_kernel()
...
@@ -140,8 +173,6 @@ __global__ void kUpdateBsplines_kernel()
float4
dr
=
cSim
.
pPmeParticleFraction
[
i
];
float4
dr
=
cSim
.
pPmeParticleFraction
[
i
];
__syncthreads
();
data
[
PME_ORDER
-
1
]
=
make_float4
(
0.0
f
);
data
[
PME_ORDER
-
1
]
=
make_float4
(
0.0
f
);
data
[
1
]
=
dr
;
data
[
1
]
=
dr
;
data
[
0
]
=
make_float4
(
1.0
f
)
-
dr
;
data
[
0
]
=
make_float4
(
1.0
f
)
-
dr
;
...
@@ -178,14 +209,11 @@ __global__ void kUpdateBsplines_kernel()
...
@@ -178,14 +209,11 @@ __global__ void kUpdateBsplines_kernel()
}
}
data
[
0
]
=
div_o
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
data
[
0
]
=
div_o
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
__syncthreads
();
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
{
cSim
.
pPmeBsplineTheta
[
i
+
j
*
cSim
.
atoms
]
=
data
[
j
];
cSim
.
pPmeBsplineTheta
[
i
+
j
*
cSim
.
atoms
]
=
data
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
+
j
*
cSim
.
atoms
]
=
ddata
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
+
j
*
cSim
.
atoms
]
=
ddata
[
j
];
}
}
__syncthreads
();
}
}
}
}
...
@@ -193,11 +221,9 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -193,11 +221,9 @@ __global__ void kGridSpreadCharge_kernel()
{
{
extern
__shared__
float
atomCharge
[];
extern
__shared__
float
atomCharge
[];
int4
*
atomGridIndex
=
(
int4
*
)
&
atomCharge
[
blockDim
.
x
];
int4
*
atomGridIndex
=
(
int4
*
)
&
atomCharge
[
blockDim
.
x
];
float4
*
bsplineTheta
=
(
float4
*
)
&
atomGridIndex
[
blockDim
.
x
];
const
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
const
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
const
int3
groupDim
=
make_int3
(
4
,
4
,
2
);
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
cSim
.
pmeGroupSize
.
x
-
1
)
/
cSim
.
pmeGroupSize
.
x
,
(
cSim
.
pmeGridSize
.
y
+
cSim
.
pmeGroupSize
.
y
-
1
)
/
cSim
.
pmeGroupSize
.
y
,
(
cSim
.
pmeGridSize
.
z
+
cSim
.
pmeGroupSize
.
z
-
1
)
/
cSim
.
pmeGroupSize
.
z
);
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
groupDim
.
x
-
1
)
/
groupDim
.
x
,
(
cSim
.
pmeGridSize
.
y
+
groupDim
.
y
-
1
)
/
groupDim
.
y
,
(
cSim
.
pmeGridSize
.
z
+
groupDim
.
z
-
1
)
/
groupDim
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
unsigned
int
group
=
warp
*
totalGroups
/
totalWarps
;
unsigned
int
group
=
warp
*
totalGroups
/
totalWarps
;
const
unsigned
int
end
=
(
warp
+
1
)
*
totalGroups
/
totalWarps
;
const
unsigned
int
end
=
(
warp
+
1
)
*
totalGroups
/
totalWarps
;
...
@@ -205,7 +231,7 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -205,7 +231,7 @@ __global__ void kGridSpreadCharge_kernel()
while
(
group
<
end
)
while
(
group
<
end
)
{
{
// Process a group of grid points of size
groupDim
. First figure out the base index for the group,
// Process a group of grid points of size
cSim.pmeGroupSize
. First figure out the base index for the group,
// and the index of the specific point this thread will handle.
// and the index of the specific point this thread will handle.
int3
gridBase
;
int3
gridBase
;
...
@@ -213,14 +239,14 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -213,14 +239,14 @@ __global__ void kGridSpreadCharge_kernel()
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
x
*=
groupDim
.
x
;
gridBase
.
x
*=
cSim
.
pmeGroupSize
.
x
;
gridBase
.
y
*=
groupDim
.
y
;
gridBase
.
y
*=
cSim
.
pmeGroupSize
.
y
;
gridBase
.
z
*=
groupDim
.
z
;
gridBase
.
z
*=
cSim
.
pmeGroupSize
.
z
;
int3
gridPoint
;
int3
gridPoint
;
gridPoint
.
x
=
index
/
(
groupDim
.
y
*
groupDim
.
z
);
gridPoint
.
x
=
index
/
(
cSim
.
pmeGroupSize
.
y
*
cSim
.
pmeGroupSize
.
z
);
remainder
=
index
-
gridPoint
.
x
*
groupDim
.
y
*
groupDim
.
z
;
remainder
=
index
-
gridPoint
.
x
*
cSim
.
pmeGroupSize
.
y
*
cSim
.
pmeGroupSize
.
z
;
gridPoint
.
y
=
remainder
/
groupDim
.
z
;
gridPoint
.
y
=
remainder
/
cSim
.
pmeGroupSize
.
z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
groupDim
.
z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
cSim
.
pmeGroupSize
.
z
;
gridPoint
.
x
+=
gridBase
.
x
;
gridPoint
.
x
+=
gridBase
.
x
;
gridPoint
.
y
+=
gridBase
.
y
;
gridPoint
.
y
+=
gridBase
.
y
;
gridPoint
.
z
+=
gridBase
.
z
;
gridPoint
.
z
+=
gridBase
.
z
;
...
@@ -228,17 +254,22 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -228,17 +254,22 @@ __global__ void kGridSpreadCharge_kernel()
// Loop over blocks of atoms.
// Loop over blocks of atoms.
float
result
=
0.0
f
;
float
result
=
0.0
f
;
int
flags
=
0
;
unsigned
int
baseIndex
=
group
*
(
cSim
.
paddedNumberOfAtoms
/
32
);
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
{
{
// Decide if this block actually needs to be processed.
int
flagIndex
=
atomBlock
%
32
;
if
(
flagIndex
==
0
)
flags
=
cSim
.
pPmeInteractionFlags
[
baseIndex
+
atomBlock
/
32
];
if
((
flags
&
(
1
<<
flagIndex
))
!=
0
)
continue
;
int
atomIndex
=
(
atomBlock
<<
GRIDBITS
)
+
index
;
int
atomIndex
=
(
atomBlock
<<
GRIDBITS
)
+
index
;
if
(
atomIndex
<
cSim
.
atoms
)
if
(
atomIndex
<
cSim
.
atoms
)
{
{
atomCharge
[
threadIdx
.
x
]
=
cSim
.
pPosq
[
atomIndex
].
w
;
atomCharge
[
threadIdx
.
x
]
=
cSim
.
pPosq
[
atomIndex
].
w
;
atomGridIndex
[
threadIdx
.
x
]
=
cSim
.
pPmeParticleIndex
[
atomIndex
];
atomGridIndex
[
threadIdx
.
x
]
=
cSim
.
pPmeParticleIndex
[
atomIndex
];
// bsplineTheta[threadIdx.x] = cSim.pPmeBsplineTheta[atomIndex];
// bsplineTheta[threadIdx.x+blockDim.x] = cSim.pPmeBsplineTheta[atomIndex+cSim.atoms];
// bsplineTheta[threadIdx.x+2*blockDim.x] = cSim.pPmeBsplineTheta[atomIndex+2*cSim.atoms];
// bsplineTheta[threadIdx.x+3*blockDim.x] = cSim.pPmeBsplineTheta[atomIndex+3*cSim.atoms];
}
}
int
maxAtoms
=
min
(
GRID
,
cSim
.
atoms
-
(
atomBlock
<<
GRIDBITS
));
int
maxAtoms
=
min
(
GRID
,
cSim
.
atoms
-
(
atomBlock
<<
GRIDBITS
));
for
(
int
i
=
0
;
i
<
maxAtoms
;
i
++
)
for
(
int
i
=
0
;
i
<
maxAtoms
;
i
++
)
...
@@ -248,14 +279,10 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -248,14 +279,10 @@ __global__ void kGridSpreadCharge_kernel()
int
ix
=
gridPoint
.
x
-
atomGridIndex
[
localIndex
].
x
;
int
ix
=
gridPoint
.
x
-
atomGridIndex
[
localIndex
].
x
;
int
iy
=
gridPoint
.
y
-
atomGridIndex
[
localIndex
].
y
;
int
iy
=
gridPoint
.
y
-
atomGridIndex
[
localIndex
].
y
;
int
iz
=
gridPoint
.
z
-
atomGridIndex
[
localIndex
].
z
;
int
iz
=
gridPoint
.
z
-
atomGridIndex
[
localIndex
].
z
;
if
(
ix
<
0
)
ix
+=
(
ix
<
0
?
cSim
.
pmeGridSize
.
x
:
0
);
ix
+=
cSim
.
pmeGridSize
.
x
;
iy
+=
(
iy
<
0
?
cSim
.
pmeGridSize
.
y
:
0
);
if
(
iy
<
0
)
iz
+=
(
iz
<
0
?
cSim
.
pmeGridSize
.
z
:
0
);
iy
+=
cSim
.
pmeGridSize
.
y
;
if
(
iz
<
0
)
iz
+=
cSim
.
pmeGridSize
.
z
;
if
(
ix
<
PME_ORDER
&&
iy
<
PME_ORDER
&&
iz
<
PME_ORDER
)
if
(
ix
<
PME_ORDER
&&
iy
<
PME_ORDER
&&
iz
<
PME_ORDER
)
// result += atomCharge[threadIdx.x-index+i]*bsplineTheta[localIndex+ix*blockDim.x].x*bsplineTheta[localIndex+iy*blockDim.x].y*bsplineTheta[localIndex+iz*blockDim.x].z;
result
+=
atomCharge
[
threadIdx
.
x
-
index
+
i
]
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
ix
*
cSim
.
atoms
].
x
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iy
*
cSim
.
atoms
].
y
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iz
*
cSim
.
atoms
].
z
;
result
+=
atomCharge
[
threadIdx
.
x
-
index
+
i
]
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
ix
*
cSim
.
atoms
].
x
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iy
*
cSim
.
atoms
].
y
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iz
*
cSim
.
atoms
].
z
;
}
}
}
}
...
@@ -346,7 +373,7 @@ void kCalculatePME(gpuContext gpu)
...
@@ -346,7 +373,7 @@ void kCalculatePME(gpuContext gpu)
unsigned
int
threads
=
16380
/
(
2
*
PME_ORDER
*
sizeof
(
float4
));
unsigned
int
threads
=
16380
/
(
2
*
PME_ORDER
*
sizeof
(
float4
));
kUpdateBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
threads
,
2
*
threads
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
kUpdateBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
threads
,
2
*
threads
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
LAUNCHERROR
(
"kUpdateBsplines"
);
LAUNCHERROR
(
"kUpdateBsplines"
);
kGridSpreadCharge_kernel
<<<
gpu
->
sim
.
blocks
,
64
,
64
*
(
sizeof
(
float
)
+
sizeof
(
int4
)
+
4
*
sizeof
(
float4
)
)
>>>
();
kGridSpreadCharge_kernel
<<<
gpu
->
sim
.
blocks
,
64
,
64
*
(
sizeof
(
float
)
+
sizeof
(
int4
))
>>>
();
LAUNCHERROR
(
"kGridSpreadCharge"
);
LAUNCHERROR
(
"kGridSpreadCharge"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment