Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
235a88e5
"vscode:/vscode.git/clone" did not exist on "45c231325cc847484753d5d0a833de8d7a1a4107"
Commit
235a88e5
authored
Sep 11, 2012
by
Peter Eastman
Browse files
Minor cleanup to PME
parent
b27a0bd6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
27 deletions
+15
-27
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+9
-6
platforms/cuda2/src/CudaKernels.h
platforms/cuda2/src/CudaKernels.h
+0
-8
platforms/cuda2/src/kernels/pme.cu
platforms/cuda2/src/kernels/pme.cu
+6
-13
No files found.
platforms/cuda2/src/CudaKernels.cpp
View file @
235a88e5
...
...
@@ -1490,6 +1490,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
...
...
@@ -1601,7 +1602,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
void
*
forcesArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cosSinSums
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
ewaldForcesKernel
,
forcesArgs
,
cu
.
getNumAtoms
());
}
if
(
convolvedPmeGrid
!=
NULL
&&
originalPmeGrid
!=
NULL
&&
reciproc
alPmeGrid
!=
NULL
&&
cu
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
if
(
origin
alPmeGrid
!=
NULL
&&
cu
.
getContextIndex
()
==
0
&&
includeReciprocal
)
{
void
*
bsplinesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeBsplineTheta
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
int
bsplinesSharedSize
=
cu
.
ThreadBlockSize
*
PmeOrder
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
));
...
...
@@ -1617,7 +1618,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu
.
executeKernel
(
pmeSpreadChargeKernel
,
spreadArgs
,
cu
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
void
*
finishSpreadArgs
[]
=
{
&
originalPmeGrid
->
getDevicePointer
()};
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getComputeCapability
()
<
2.0
)
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getComputeCapability
()
<
2.0
)
{
void
*
finishSpreadArgs
[]
=
{
&
originalPmeGrid
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
originalPmeGrid
->
getSize
());
}
...
...
@@ -1633,11 +1634,13 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2D
(
fftBackward
,
(
double2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
double
*
)
convolvedPmeGrid
->
getDevicePointer
());
else
cufftExecC2R
(
fftBackward
,
(
float2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
float
*
)
convolvedPmeGrid
->
getDevicePointer
());
void
*
computeEnergyArgs
[]
=
{
&
originalPmeGrid
->
getDevicePointer
(),
&
convolvedPmeGrid
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
()
};
cu
.
executeKernel
(
pmeEvalEnergyKernel
,
computeEnergyArgs
,
cu
.
getNumAtoms
());
cufftExecC2R
(
fftBackward
,
(
float2
*
)
reciprocalPmeGrid
->
getDevicePointer
(),
(
float
*
)
convolvedPmeGrid
->
getDevicePointer
());
if
(
includeEnergy
)
{
void
*
computeEnergyArgs
[]
=
{
&
originalPmeGrid
->
getDevicePointer
(),
&
convolvedPmeGrid
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
()
};
cu
.
executeKernel
(
pmeEvalEnergyKernel
,
computeEnergyArgs
,
cu
.
getNumAtoms
());
}
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
convolvedPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
());
...
...
platforms/cuda2/src/CudaKernels.h
View file @
235a88e5
...
...
@@ -595,13 +595,9 @@ private:
CudaArray
*
sigmaEpsilon
;
CudaArray
*
exceptionParams
;
CudaArray
*
cosSinSums
;
//TODO: separate into realpmeGrid, complex pmegrid, and resultpmeGrid
CudaArray
*
originalPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
convolvedPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
...
...
@@ -610,10 +606,8 @@ private:
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
pmeGridIndexKernel
;
...
...
@@ -622,9 +616,7 @@ private:
CUfunction
pmeUpdateBsplinesKernel
;
CUfunction
pmeSpreadChargeKernel
;
CUfunction
pmeFinishSpreadChargeKernel
;
/* TESTING ENERGY KERNEL */
CUfunction
pmeEvalEnergyKernel
;
CUfunction
pmeConvolutionKernel
;
CUfunction
pmeInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
...
...
platforms/cuda2/src/kernels/pme.cu
View file @
235a88e5
...
...
@@ -53,6 +53,7 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn
}
// Fill in values beyond the last atom.
if
(
blockIdx
.
x
==
gridDim
.
x
-
1
&&
threadIdx
.
x
==
blockDim
.
x
-
1
)
{
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
for
(
int
j
=
last
+
1
;
j
<=
gridSize
;
++
j
)
...
...
@@ -139,13 +140,11 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
//R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
int
kx
=
index
/
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
));
int
remainder
=
index
-
kx
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
...
...
@@ -180,14 +179,11 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
}
extern
"C"
__global__
void
gridEvaluateEnergy
(
const
real
*
__restrict__
originalGrid
,
const
real
*
__restrict
convolvedGrid
,
real
*
__restrict__
energyBuffer
)
{
void
gridEvaluateEnergy
(
const
real
*
__restrict__
originalGrid
,
const
real
*
__restrict__
convolvedGrid
,
real
*
__restrict__
energyBuffer
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
real
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
energy
+=
originalGrid
[
index
]
*
convolvedGrid
[
index
];
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
*
energy
;
}
...
...
@@ -213,16 +209,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real3
dr
=
make_real3
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
);
data
[
PME_ORDER
-
1
]
=
make_real3
(
0
);
data
[
1
]
=
dr
;
data
[
0
]
=
make_real3
(
1
)
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
make_real3
(
k
))
*
data
[
j
-
k
-
2
]
+
(
make_real3
(
j
-
k
)
-
dr
)
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
...
...
@@ -233,7 +228,6 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
ddata
[
j
]
=
data
[
j
-
1
]
-
data
[
j
];
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
make_real3
(
j
))
*
data
[
PME_ORDER
-
j
-
2
]
+
(
make_real3
(
PME_ORDER
-
j
)
-
dr
)
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
...
...
@@ -266,7 +260,6 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
x
*
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0xFFFFFFFF
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
y
*
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0xFFFFFFFF
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0xFFFFFFFF
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment