Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d5cc0fba
Commit
d5cc0fba
authored
Sep 09, 2009
by
Peter Eastman
Browse files
Reduced memory use for PME.
parent
dc677fac
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
26 additions
and
35 deletions
+26
-35
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+0
-2
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+0
-8
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+0
-2
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+26
-23
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
d5cc0fba
...
...
@@ -367,8 +367,6 @@ struct cudaGmxSimulation {
float
*
pPmeBsplineModuli
[
3
];
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineDtheta
;
int4
*
pPmeParticleIndex
;
// The grid indices for each atom
float4
*
pPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
int
*
pPmeAtomRange
;
// The range of sorted atoms at each grid point
float2
*
pPmeAtomGridIndex
;
// The grid point each atom is at
unsigned
int
bonds
;
// Number of bonds
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
d5cc0fba
...
...
@@ -795,10 +795,6 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha, int gridSizeX, int gridSiz
gpu
->
sim
.
pPmeBsplineTheta
=
gpu
->
psPmeBsplineTheta
->
_pDevData
;
gpu
->
psPmeBsplineDtheta
=
new
CUDAStream
<
float4
>
(
PME_ORDER
*
gpu
->
natoms
,
1
,
"PmeBsplineDtheta"
);
gpu
->
sim
.
pPmeBsplineDtheta
=
gpu
->
psPmeBsplineDtheta
->
_pDevData
;
gpu
->
psPmeParticleIndex
=
new
CUDAStream
<
int4
>
(
gpu
->
natoms
,
1
,
"PmeParticleIndex"
);
gpu
->
sim
.
pPmeParticleIndex
=
gpu
->
psPmeParticleIndex
->
_pDevData
;
gpu
->
psPmeParticleFraction
=
new
CUDAStream
<
float4
>
(
gpu
->
natoms
,
1
,
"PmeParticleFraction"
);
gpu
->
sim
.
pPmeParticleFraction
=
gpu
->
psPmeParticleFraction
->
_pDevData
;
gpu
->
psPmeAtomRange
=
new
CUDAStream
<
int
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
+
1
,
1
,
"PmeAtomRange"
);
gpu
->
sim
.
pPmeAtomRange
=
gpu
->
psPmeAtomRange
->
_pDevData
;
gpu
->
psPmeAtomGridIndex
=
new
CUDAStream
<
float2
>
(
gpu
->
natoms
,
1
,
"PmeAtomGridIndex"
);
...
...
@@ -1703,8 +1699,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
psPmeBsplineModuli
[
2
]
=
NULL
;
gpu
->
psPmeBsplineTheta
=
NULL
;
gpu
->
psPmeBsplineDtheta
=
NULL
;
gpu
->
psPmeParticleIndex
=
NULL
;
gpu
->
psPmeParticleFraction
=
NULL
;
gpu
->
psPmeAtomRange
=
NULL
;
gpu
->
psPmeAtomGridIndex
=
NULL
;
gpu
->
psShakeID
=
NULL
;
...
...
@@ -1872,8 +1866,6 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psPmeBsplineModuli
[
2
];
delete
gpu
->
psPmeBsplineTheta
;
delete
gpu
->
psPmeBsplineDtheta
;
delete
gpu
->
psPmeParticleIndex
;
delete
gpu
->
psPmeParticleFraction
;
delete
gpu
->
psPmeAtomRange
;
delete
gpu
->
psPmeAtomGridIndex
;
delete
gpu
->
psTabulatedErfc
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
d5cc0fba
...
...
@@ -111,8 +111,6 @@ struct _gpuContext {
CUDAStream
<
float
>*
psPmeBsplineModuli
[
3
];
CUDAStream
<
float4
>*
psPmeBsplineTheta
;
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
int4
>*
psPmeParticleIndex
;
// The grid indices for each atom
CUDAStream
<
float4
>*
psPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
CUDAStream
<
int
>*
psPmeAtomRange
;
// The range of sorted atoms at each grid point
CUDAStream
<
float2
>*
psPmeAtomGridIndex
;
// The grid point each atom is at
CUDAStream
<
float2
>*
psObcData
;
...
...
platforms/cuda/src/kernels/kCalculatePME.cu
View file @
d5cc0fba
...
...
@@ -32,8 +32,8 @@ using namespace std;
static
__constant__
cudaGmxSimulation
cSim
;
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
void
SetCalculatePMESim
(
gpuContext
gpu
)
{
...
...
@@ -99,20 +99,14 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
{
float4
ftmp
=
cSim
.
pPosq
[
i
];
float3
t
=
make_float3
((
ftmp
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
ftmp
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
ftmp
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
float3
tix
;
ftmp
.
x
=
modff
(
t
.
x
,
&
tix
.
x
);
ftmp
.
y
=
modff
(
t
.
y
,
&
tix
.
y
);
ftmp
.
z
=
modff
(
t
.
z
,
&
tix
.
z
);
cSim
.
pPmeParticleFraction
[
i
]
=
ftmp
;
int4
itmp
=
make_int4
(
fast_mod
(
__float2int_rd
(
tix
.
x
),
cSim
.
pmeGridSize
.
x
),
fast_mod
(
__float2int_rd
(
tix
.
y
),
cSim
.
pmeGridSize
.
y
),
fast_mod
(
__float2int_rd
(
tix
.
z
),
cSim
.
pmeGridSize
.
z
),
0
);
cSim
.
pPmeParticleIndex
[
i
]
=
itmp
;
cSim
.
pPmeAtomGridIndex
[
i
]
=
make_float2
(
i
,
itmp
.
x
*
cSim
.
pmeGridSize
.
y
*
cSim
.
pmeGridSize
.
z
+
itmp
.
y
*
cSim
.
pmeGridSize
.
z
+
itmp
.
z
);
float4
posq
=
cSim
.
pPosq
[
i
];
float3
t
=
make_float3
((
posq
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
posq
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
posq
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
cSim
.
pmeGridSize
.
x
,
((
int
)
t
.
y
)
%
cSim
.
pmeGridSize
.
y
,
((
int
)
t
.
z
)
%
cSim
.
pmeGridSize
.
z
);
cSim
.
pPmeAtomGridIndex
[
i
]
=
make_float2
(
i
,
gridIndex
.
x
*
cSim
.
pmeGridSize
.
y
*
cSim
.
pmeGridSize
.
z
+
gridIndex
.
y
*
cSim
.
pmeGridSize
.
z
+
gridIndex
.
z
);
}
}
...
...
@@ -173,7 +167,11 @@ __global__ void kUpdateBsplines_kernel()
ddata
[
j
]
=
make_float4
(
0.0
f
);
}
float4
dr
=
cSim
.
pPmeParticleFraction
[
i
];
float4
posq
=
cSim
.
pPosq
[
tid
];
float3
t
=
make_float3
((
posq
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
posq
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
posq
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
float4
dr
=
make_float4
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
,
0.0
f
);
data
[
PME_ORDER
-
1
]
=
make_float4
(
0.0
f
);
data
[
1
]
=
dr
;
...
...
@@ -295,7 +293,12 @@ __global__ void kGridInterpolateForce_kernel()
{
float3
force
=
make_float3
(
0.0
f
,
0.0
f
,
0.0
f
);
float4
posq
=
cSim
.
pPosq
[
atom
];
int4
gridIndex
=
cSim
.
pPmeParticleIndex
[
atom
];
float3
t
=
make_float3
((
posq
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
posq
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
posq
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
cSim
.
pmeGridSize
.
x
,
((
int
)
t
.
y
)
%
cSim
.
pmeGridSize
.
y
,
((
int
)
t
.
z
)
%
cSim
.
pmeGridSize
.
z
);
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xindex
=
(
gridIndex
.
x
+
ix
)
%
cSim
.
pmeGridSize
.
x
;
...
...
@@ -312,10 +315,10 @@ __global__ void kGridInterpolateForce_kernel()
float
tz
=
cSim
.
pPmeBsplineTheta
[
atom
+
iz
*
cSim
.
atoms
].
z
;
float
dtz
=
cSim
.
pPmeBsplineDtheta
[
atom
+
iz
*
cSim
.
atoms
].
z
;
int
index
=
xindex
*
cSim
.
pmeGridSize
.
y
*
cSim
.
pmeGridSize
.
z
+
yindex
*
cSim
.
pmeGridSize
.
z
+
zindex
;
float
gridvalue
=
cSim
.
pPmeGrid
[
index
].
x
;
force
.
x
+=
dtx
*
ty
*
tz
*
gridvalue
;
force
.
y
+=
tx
*
dty
*
tz
*
gridvalue
;
force
.
z
+=
tx
*
ty
*
dtz
*
gridvalue
;
float
gridvalue
=
cSim
.
pPmeGrid
[
index
].
x
;
force
.
x
+=
dtx
*
ty
*
tz
*
gridvalue
;
force
.
y
+=
tx
*
dty
*
tz
*
gridvalue
;
force
.
z
+=
tx
*
ty
*
dtz
*
gridvalue
;
}
}
}
...
...
@@ -347,6 +350,6 @@ void kCalculatePME(gpuContext gpu)
kReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kGridInterpolateForce_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kGridInterpolateForce_kernel
<<<
2
*
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kGridInterpolateForce"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment