Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
125e52ae
Commit
125e52ae
authored
Sep 01, 2009
by
Peter Eastman
Browse files
Optimizations for PME
parent
3638f243
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
74 additions
and
38 deletions
+74
-38
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+2
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+7
-1
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+2
-1
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+63
-36
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
125e52ae
...
@@ -359,12 +359,14 @@ struct cudaGmxSimulation {
...
@@ -359,12 +359,14 @@ struct cudaGmxSimulation {
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
int3
pmeGridSize
;
// The dimensions of the grid for particle mesh Ewald
int3
pmeGridSize
;
// The dimensions of the grid for particle mesh Ewald
int3
pmeGroupSize
;
// The dimensions of the groups used in charge spreading for PME
cufftComplex
*
pPmeGrid
;
// Grid points for particle mesh Ewald
cufftComplex
*
pPmeGrid
;
// Grid points for particle mesh Ewald
float
*
pPmeBsplineModuli
[
3
];
float
*
pPmeBsplineModuli
[
3
];
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineDtheta
;
float4
*
pPmeBsplineDtheta
;
int4
*
pPmeParticleIndex
;
// The grid indices for each atom
int4
*
pPmeParticleIndex
;
// The grid indices for each atom
float4
*
pPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
float4
*
pPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
int
*
pPmeInteractionFlags
;
// Flags for which groups of grid points interact with which atoms
unsigned
int
bonds
;
// Number of bonds
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
float2
*
pBondParameter
;
// Bond parameters
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
125e52ae
...
@@ -761,8 +761,12 @@ extern "C"
...
@@ -761,8 +761,12 @@ extern "C"
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
)
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
)
{
{
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
alphaEwald
=
alpha
;
int3
gridSize
=
make_int3
(
16
,
16
,
16
);
int3
gridSize
=
make_int3
(
32
,
32
,
32
);
gpu
->
sim
.
pmeGridSize
=
gridSize
;
gpu
->
sim
.
pmeGridSize
=
gridSize
;
int3
groupSize
=
make_int3
(
2
,
4
,
4
);
gpu
->
sim
.
pmeGroupSize
=
groupSize
;
const
int3
numGroups
=
make_int3
((
gridSize
.
x
+
groupSize
.
x
-
1
)
/
groupSize
.
x
,
(
gridSize
.
y
+
groupSize
.
y
-
1
)
/
groupSize
.
y
,
(
gridSize
.
z
+
groupSize
.
z
-
1
)
/
groupSize
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
cufftPlan3d
(
&
gpu
->
fftplan
,
gridSize
.
x
,
gridSize
.
y
,
gridSize
.
z
,
CUFFT_C2C
);
cufftPlan3d
(
&
gpu
->
fftplan
,
gridSize
.
x
,
gridSize
.
y
,
gridSize
.
z
,
CUFFT_C2C
);
gpu
->
psPmeGrid
=
new
CUDAStream
<
cufftComplex
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
,
1
,
"PmeGrid"
);
gpu
->
psPmeGrid
=
new
CUDAStream
<
cufftComplex
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
,
1
,
"PmeGrid"
);
gpu
->
sim
.
pPmeGrid
=
gpu
->
psPmeGrid
->
_pDevData
;
gpu
->
sim
.
pPmeGrid
=
gpu
->
psPmeGrid
->
_pDevData
;
...
@@ -780,6 +784,8 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha)
...
@@ -780,6 +784,8 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha)
gpu
->
sim
.
pPmeParticleIndex
=
gpu
->
psPmeParticleIndex
->
_pDevData
;
gpu
->
sim
.
pPmeParticleIndex
=
gpu
->
psPmeParticleIndex
->
_pDevData
;
gpu
->
psPmeParticleFraction
=
new
CUDAStream
<
float4
>
(
gpu
->
natoms
,
1
,
"PmeParticleFraction"
);
gpu
->
psPmeParticleFraction
=
new
CUDAStream
<
float4
>
(
gpu
->
natoms
,
1
,
"PmeParticleFraction"
);
gpu
->
sim
.
pPmeParticleFraction
=
gpu
->
psPmeParticleFraction
->
_pDevData
;
gpu
->
sim
.
pPmeParticleFraction
=
gpu
->
psPmeParticleFraction
->
_pDevData
;
gpu
->
psPmeInteractionFlags
=
new
CUDAStream
<
int
>
(
totalGroups
*
(
gpu
->
sim
.
paddedNumberOfAtoms
/
32
),
1
,
"PmeInteractionFlags"
);
gpu
->
sim
.
pPmeInteractionFlags
=
gpu
->
psPmeInteractionFlags
->
_pDevData
;
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
125e52ae
...
@@ -112,6 +112,7 @@ struct _gpuContext {
...
@@ -112,6 +112,7 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
int4
>*
psPmeParticleIndex
;
// The grid indices for each atom
CUDAStream
<
int4
>*
psPmeParticleIndex
;
// The grid indices for each atom
CUDAStream
<
float4
>*
psPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
CUDAStream
<
float4
>*
psPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
CUDAStream
<
int
>*
psPmeInteractionFlags
;
// Flags for which groups of grid points interact with which atoms
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psBornForce
;
CUDAStream
<
float
>*
psBornForce
;
...
...
platforms/cuda/src/kernels/kCalculatePME.cu
View file @
125e52ae
...
@@ -94,9 +94,6 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
...
@@ -94,9 +94,6 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
{
{
float4
ftmp
=
cSim
.
pPosq
[
i
];
float4
ftmp
=
cSim
.
pPosq
[
i
];
__syncthreads
();
float3
t
=
make_float3
((
ftmp
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
float3
t
=
make_float3
((
ftmp
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
ftmp
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
ftmp
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
ftmp
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
(
ftmp
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
...
@@ -104,17 +101,53 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
...
@@ -104,17 +101,53 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
ftmp
.
x
=
modff
(
t
.
x
,
&
tix
.
x
);
ftmp
.
x
=
modff
(
t
.
x
,
&
tix
.
x
);
ftmp
.
y
=
modff
(
t
.
y
,
&
tix
.
y
);
ftmp
.
y
=
modff
(
t
.
y
,
&
tix
.
y
);
ftmp
.
z
=
modff
(
t
.
z
,
&
tix
.
z
);
ftmp
.
z
=
modff
(
t
.
z
,
&
tix
.
z
);
cSim
.
pPmeParticleFraction
[
i
]
=
ftmp
;
cSim
.
pPmeParticleFraction
[
i
]
=
ftmp
;
/* avoid costly % operations if possible that is if dc_ngrid.* is pow. of 2 */
int4
itmp
=
make_int4
(
fast_mod
(
__float2int_rd
(
tix
.
x
),
cSim
.
pmeGridSize
.
x
),
int4
itmp
=
make_int4
(
fast_mod
(
__float2int_rd
(
tix
.
x
),
cSim
.
pmeGridSize
.
x
),
fast_mod
(
__float2int_rd
(
tix
.
y
),
cSim
.
pmeGridSize
.
y
),
fast_mod
(
__float2int_rd
(
tix
.
y
),
cSim
.
pmeGridSize
.
y
),
fast_mod
(
__float2int_rd
(
tix
.
z
),
cSim
.
pmeGridSize
.
z
),
0
);
fast_mod
(
__float2int_rd
(
tix
.
z
),
cSim
.
pmeGridSize
.
z
),
0
);
cSim
.
pPmeParticleIndex
[
i
]
=
itmp
;
cSim
.
pPmeParticleIndex
[
i
]
=
itmp
;
}
__syncthreads
();
// Compute flags for which atoms affect which groups of grid points.
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
cSim
.
pmeGroupSize
.
x
-
1
)
/
cSim
.
pmeGroupSize
.
x
,
(
cSim
.
pmeGridSize
.
y
+
cSim
.
pmeGroupSize
.
y
-
1
)
/
cSim
.
pmeGroupSize
.
y
,
(
cSim
.
pmeGridSize
.
z
+
cSim
.
pmeGroupSize
.
z
-
1
)
/
cSim
.
pmeGroupSize
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
const
float3
gridScale
=
make_float3
(
cSim
.
pmeGridSize
.
x
/
cSim
.
periodicBoxSizeX
,
cSim
.
pmeGridSize
.
y
/
cSim
.
periodicBoxSizeY
,
cSim
.
pmeGridSize
.
z
/
cSim
.
periodicBoxSizeZ
);
for
(
int
group
=
tid
;
group
<
totalGroups
;
group
+=
tnb
)
{
int3
gridBase
;
gridBase
.
x
=
group
/
(
numGroups
.
y
*
numGroups
.
z
);
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
x
*=
cSim
.
pmeGroupSize
.
x
;
gridBase
.
y
*=
cSim
.
pmeGroupSize
.
y
;
gridBase
.
z
*=
cSim
.
pmeGroupSize
.
z
;
unsigned
int
flags
=
0
;
unsigned
int
baseIndex
=
group
*
(
cSim
.
paddedNumberOfAtoms
/
32
);
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
{
// Decide if this block actually needs to be processed.
int
flagIndex
=
atomBlock
%
32
;
if
(
flagIndex
==
0
)
flags
=
0
;
float4
boxSize
=
cSim
.
pGridBoundingBox
[
atomBlock
];
float4
center
=
cSim
.
pGridCenter
[
atomBlock
];
int
maxx
=
(
int
)
ceil
((
center
.
x
+
boxSize
.
x
)
*
gridScale
.
x
)
+
cSim
.
pmeGroupSize
.
x
+
PME_ORDER
;
int
maxy
=
(
int
)
ceil
((
center
.
y
+
boxSize
.
y
)
*
gridScale
.
y
)
+
cSim
.
pmeGroupSize
.
y
+
PME_ORDER
;
int
maxz
=
(
int
)
ceil
((
center
.
z
+
boxSize
.
z
)
*
gridScale
.
z
)
+
cSim
.
pmeGroupSize
.
z
+
PME_ORDER
;
int
minx
=
(
int
)
floor
((
center
.
x
-
boxSize
.
x
)
*
gridScale
.
x
);
int
miny
=
(
int
)
floor
((
center
.
y
-
boxSize
.
y
)
*
gridScale
.
y
);
int
minz
=
(
int
)
floor
((
center
.
z
-
boxSize
.
z
)
*
gridScale
.
z
);
int
x
=
minx
+
(
gridBase
.
x
-
minx
)
%
cSim
.
pmeGridSize
.
x
;
int
y
=
miny
+
(
gridBase
.
y
-
miny
)
%
cSim
.
pmeGridSize
.
y
;
int
z
=
minz
+
(
gridBase
.
z
-
minz
)
%
cSim
.
pmeGridSize
.
z
;
if
(
maxx
<
x
||
maxy
<
y
||
maxz
<
z
)
flags
+=
1
<<
flagIndex
;
if
(
flagIndex
==
31
||
atomBlock
==
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
)
cSim
.
pPmeInteractionFlags
[
baseIndex
+
atomBlock
/
32
]
=
flags
;
}
}
}
}
}
...
@@ -140,8 +173,6 @@ __global__ void kUpdateBsplines_kernel()
...
@@ -140,8 +173,6 @@ __global__ void kUpdateBsplines_kernel()
float4
dr
=
cSim
.
pPmeParticleFraction
[
i
];
float4
dr
=
cSim
.
pPmeParticleFraction
[
i
];
__syncthreads
();
data
[
PME_ORDER
-
1
]
=
make_float4
(
0.0
f
);
data
[
PME_ORDER
-
1
]
=
make_float4
(
0.0
f
);
data
[
1
]
=
dr
;
data
[
1
]
=
dr
;
data
[
0
]
=
make_float4
(
1.0
f
)
-
dr
;
data
[
0
]
=
make_float4
(
1.0
f
)
-
dr
;
...
@@ -178,14 +209,11 @@ __global__ void kUpdateBsplines_kernel()
...
@@ -178,14 +209,11 @@ __global__ void kUpdateBsplines_kernel()
}
}
data
[
0
]
=
div_o
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
data
[
0
]
=
div_o
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
__syncthreads
();
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
{
cSim
.
pPmeBsplineTheta
[
i
+
j
*
cSim
.
atoms
]
=
data
[
j
];
cSim
.
pPmeBsplineTheta
[
i
+
j
*
cSim
.
atoms
]
=
data
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
+
j
*
cSim
.
atoms
]
=
ddata
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
+
j
*
cSim
.
atoms
]
=
ddata
[
j
];
}
}
__syncthreads
();
}
}
}
}
...
@@ -193,11 +221,9 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -193,11 +221,9 @@ __global__ void kGridSpreadCharge_kernel()
{
{
extern
__shared__
float
atomCharge
[];
extern
__shared__
float
atomCharge
[];
int4
*
atomGridIndex
=
(
int4
*
)
&
atomCharge
[
blockDim
.
x
];
int4
*
atomGridIndex
=
(
int4
*
)
&
atomCharge
[
blockDim
.
x
];
float4
*
bsplineTheta
=
(
float4
*
)
&
atomGridIndex
[
blockDim
.
x
];
const
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
const
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
const
int3
groupDim
=
make_int3
(
4
,
4
,
2
);
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
cSim
.
pmeGroupSize
.
x
-
1
)
/
cSim
.
pmeGroupSize
.
x
,
(
cSim
.
pmeGridSize
.
y
+
cSim
.
pmeGroupSize
.
y
-
1
)
/
cSim
.
pmeGroupSize
.
y
,
(
cSim
.
pmeGridSize
.
z
+
cSim
.
pmeGroupSize
.
z
-
1
)
/
cSim
.
pmeGroupSize
.
z
);
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
groupDim
.
x
-
1
)
/
groupDim
.
x
,
(
cSim
.
pmeGridSize
.
y
+
groupDim
.
y
-
1
)
/
groupDim
.
y
,
(
cSim
.
pmeGridSize
.
z
+
groupDim
.
z
-
1
)
/
groupDim
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
unsigned
int
group
=
warp
*
totalGroups
/
totalWarps
;
unsigned
int
group
=
warp
*
totalGroups
/
totalWarps
;
const
unsigned
int
end
=
(
warp
+
1
)
*
totalGroups
/
totalWarps
;
const
unsigned
int
end
=
(
warp
+
1
)
*
totalGroups
/
totalWarps
;
...
@@ -205,7 +231,7 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -205,7 +231,7 @@ __global__ void kGridSpreadCharge_kernel()
while
(
group
<
end
)
while
(
group
<
end
)
{
{
// Process a group of grid points of size
groupDim
. First figure out the base index for the group,
// Process a group of grid points of size
cSim.pmeGroupSize
. First figure out the base index for the group,
// and the index of the specific point this thread will handle.
// and the index of the specific point this thread will handle.
int3
gridBase
;
int3
gridBase
;
...
@@ -213,14 +239,14 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -213,14 +239,14 @@ __global__ void kGridSpreadCharge_kernel()
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
x
*=
groupDim
.
x
;
gridBase
.
x
*=
cSim
.
pmeGroupSize
.
x
;
gridBase
.
y
*=
groupDim
.
y
;
gridBase
.
y
*=
cSim
.
pmeGroupSize
.
y
;
gridBase
.
z
*=
groupDim
.
z
;
gridBase
.
z
*=
cSim
.
pmeGroupSize
.
z
;
int3
gridPoint
;
int3
gridPoint
;
gridPoint
.
x
=
index
/
(
groupDim
.
y
*
groupDim
.
z
);
gridPoint
.
x
=
index
/
(
cSim
.
pmeGroupSize
.
y
*
cSim
.
pmeGroupSize
.
z
);
remainder
=
index
-
gridPoint
.
x
*
groupDim
.
y
*
groupDim
.
z
;
remainder
=
index
-
gridPoint
.
x
*
cSim
.
pmeGroupSize
.
y
*
cSim
.
pmeGroupSize
.
z
;
gridPoint
.
y
=
remainder
/
groupDim
.
z
;
gridPoint
.
y
=
remainder
/
cSim
.
pmeGroupSize
.
z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
groupDim
.
z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
cSim
.
pmeGroupSize
.
z
;
gridPoint
.
x
+=
gridBase
.
x
;
gridPoint
.
x
+=
gridBase
.
x
;
gridPoint
.
y
+=
gridBase
.
y
;
gridPoint
.
y
+=
gridBase
.
y
;
gridPoint
.
z
+=
gridBase
.
z
;
gridPoint
.
z
+=
gridBase
.
z
;
...
@@ -228,17 +254,22 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -228,17 +254,22 @@ __global__ void kGridSpreadCharge_kernel()
// Loop over blocks of atoms.
// Loop over blocks of atoms.
float
result
=
0.0
f
;
float
result
=
0.0
f
;
int
flags
=
0
;
unsigned
int
baseIndex
=
group
*
(
cSim
.
paddedNumberOfAtoms
/
32
);
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
{
{
// Decide if this block actually needs to be processed.
int
flagIndex
=
atomBlock
%
32
;
if
(
flagIndex
==
0
)
flags
=
cSim
.
pPmeInteractionFlags
[
baseIndex
+
atomBlock
/
32
];
if
((
flags
&
(
1
<<
flagIndex
))
!=
0
)
continue
;
int
atomIndex
=
(
atomBlock
<<
GRIDBITS
)
+
index
;
int
atomIndex
=
(
atomBlock
<<
GRIDBITS
)
+
index
;
if
(
atomIndex
<
cSim
.
atoms
)
if
(
atomIndex
<
cSim
.
atoms
)
{
{
atomCharge
[
threadIdx
.
x
]
=
cSim
.
pPosq
[
atomIndex
].
w
;
atomCharge
[
threadIdx
.
x
]
=
cSim
.
pPosq
[
atomIndex
].
w
;
atomGridIndex
[
threadIdx
.
x
]
=
cSim
.
pPmeParticleIndex
[
atomIndex
];
atomGridIndex
[
threadIdx
.
x
]
=
cSim
.
pPmeParticleIndex
[
atomIndex
];
// bsplineTheta[threadIdx.x] = cSim.pPmeBsplineTheta[atomIndex];
// bsplineTheta[threadIdx.x+blockDim.x] = cSim.pPmeBsplineTheta[atomIndex+cSim.atoms];
// bsplineTheta[threadIdx.x+2*blockDim.x] = cSim.pPmeBsplineTheta[atomIndex+2*cSim.atoms];
// bsplineTheta[threadIdx.x+3*blockDim.x] = cSim.pPmeBsplineTheta[atomIndex+3*cSim.atoms];
}
}
int
maxAtoms
=
min
(
GRID
,
cSim
.
atoms
-
(
atomBlock
<<
GRIDBITS
));
int
maxAtoms
=
min
(
GRID
,
cSim
.
atoms
-
(
atomBlock
<<
GRIDBITS
));
for
(
int
i
=
0
;
i
<
maxAtoms
;
i
++
)
for
(
int
i
=
0
;
i
<
maxAtoms
;
i
++
)
...
@@ -248,14 +279,10 @@ __global__ void kGridSpreadCharge_kernel()
...
@@ -248,14 +279,10 @@ __global__ void kGridSpreadCharge_kernel()
int
ix
=
gridPoint
.
x
-
atomGridIndex
[
localIndex
].
x
;
int
ix
=
gridPoint
.
x
-
atomGridIndex
[
localIndex
].
x
;
int
iy
=
gridPoint
.
y
-
atomGridIndex
[
localIndex
].
y
;
int
iy
=
gridPoint
.
y
-
atomGridIndex
[
localIndex
].
y
;
int
iz
=
gridPoint
.
z
-
atomGridIndex
[
localIndex
].
z
;
int
iz
=
gridPoint
.
z
-
atomGridIndex
[
localIndex
].
z
;
if
(
ix
<
0
)
ix
+=
(
ix
<
0
?
cSim
.
pmeGridSize
.
x
:
0
);
ix
+=
cSim
.
pmeGridSize
.
x
;
iy
+=
(
iy
<
0
?
cSim
.
pmeGridSize
.
y
:
0
);
if
(
iy
<
0
)
iz
+=
(
iz
<
0
?
cSim
.
pmeGridSize
.
z
:
0
);
iy
+=
cSim
.
pmeGridSize
.
y
;
if
(
iz
<
0
)
iz
+=
cSim
.
pmeGridSize
.
z
;
if
(
ix
<
PME_ORDER
&&
iy
<
PME_ORDER
&&
iz
<
PME_ORDER
)
if
(
ix
<
PME_ORDER
&&
iy
<
PME_ORDER
&&
iz
<
PME_ORDER
)
// result += atomCharge[threadIdx.x-index+i]*bsplineTheta[localIndex+ix*blockDim.x].x*bsplineTheta[localIndex+iy*blockDim.x].y*bsplineTheta[localIndex+iz*blockDim.x].z;
result
+=
atomCharge
[
threadIdx
.
x
-
index
+
i
]
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
ix
*
cSim
.
atoms
].
x
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iy
*
cSim
.
atoms
].
y
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iz
*
cSim
.
atoms
].
z
;
result
+=
atomCharge
[
threadIdx
.
x
-
index
+
i
]
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
ix
*
cSim
.
atoms
].
x
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iy
*
cSim
.
atoms
].
y
*
cSim
.
pPmeBsplineTheta
[
atomIndex
+
iz
*
cSim
.
atoms
].
z
;
}
}
}
}
...
@@ -346,7 +373,7 @@ void kCalculatePME(gpuContext gpu)
...
@@ -346,7 +373,7 @@ void kCalculatePME(gpuContext gpu)
unsigned
int
threads
=
16380
/
(
2
*
PME_ORDER
*
sizeof
(
float4
));
unsigned
int
threads
=
16380
/
(
2
*
PME_ORDER
*
sizeof
(
float4
));
kUpdateBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
threads
,
2
*
threads
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
kUpdateBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
threads
,
2
*
threads
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
LAUNCHERROR
(
"kUpdateBsplines"
);
LAUNCHERROR
(
"kUpdateBsplines"
);
kGridSpreadCharge_kernel
<<<
gpu
->
sim
.
blocks
,
64
,
64
*
(
sizeof
(
float
)
+
sizeof
(
int4
)
+
4
*
sizeof
(
float4
)
)
>>>
();
kGridSpreadCharge_kernel
<<<
gpu
->
sim
.
blocks
,
64
,
64
*
(
sizeof
(
float
)
+
sizeof
(
int4
))
>>>
();
LAUNCHERROR
(
"kGridSpreadCharge"
);
LAUNCHERROR
(
"kGridSpreadCharge"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment