Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dbcc494f
Commit
dbcc494f
authored
Jan 24, 2017
by
Peter Eastman
Browse files
Cleanup to CUDA dispersion PME code
parent
a9f65649
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
75 additions
and
170 deletions
+75
-170
openmmapi/include/openmm/internal/NonbondedForceImpl.h
openmmapi/include/openmm/internal/NonbondedForceImpl.h
+1
-1
openmmapi/src/NonbondedForceImpl.cpp
openmmapi/src/NonbondedForceImpl.cpp
+3
-4
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+6
-13
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+54
-144
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+2
-1
platforms/cuda/src/kernels/ljpme.cu
platforms/cuda/src/kernels/ljpme.cu
+9
-7
No files found.
openmmapi/include/openmm/internal/NonbondedForceImpl.h
View file @
dbcc494f
...
@@ -75,7 +75,7 @@ public:
...
@@ -75,7 +75,7 @@ public:
* This is a utility routine that calculates the values to use for alpha and grid size when using
* This is a utility routine that calculates the values to use for alpha and grid size when using
* Particle Mesh Ewald.
* Particle Mesh Ewald.
*/
*/
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
);
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
lj
);
/**
/**
* Compute the coefficient which, when divided by the periodic box volume, gives the
* Compute the coefficient which, when divided by the periodic box volume, gives the
* long range dispersion correction to the energy.
* long range dispersion correction to the energy.
...
...
openmmapi/src/NonbondedForceImpl.cpp
View file @
dbcc494f
...
@@ -151,12 +151,11 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
...
@@ -151,12 +151,11 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
kmaxz
++
;
kmaxz
++
;
}
}
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
)
{
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
lj
)
{
if
(
LJ
)
{
if
(
lj
)
force
.
getLJPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
force
.
getLJPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
else
{
else
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
if
(
alpha
==
0.0
)
{
if
(
alpha
==
0.0
)
{
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
...
...
platforms/cuda/include/CudaKernels.h
View file @
dbcc494f
...
@@ -598,10 +598,9 @@ private:
...
@@ -598,10 +598,9 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
C6s
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
directDispersionPmeGrid
(
NULL
),
reciprocalDispersionPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeAtomDispersionGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeEnergyBuffer
(
NULL
),
dispersionPmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
),
dispersionPmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -666,28 +665,22 @@ private:
...
@@ -666,28 +665,22 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
bool
hasInitializedFFT
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
C6s
;
CudaArray
*
exceptionParams
;
CudaArray
*
exceptionParams
;
CudaArray
*
cosSinSums
;
CudaArray
*
cosSinSums
;
CudaArray
*
directPmeGrid
;
CudaArray
*
directPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
directDispersionPmeGrid
;
CudaArray
*
reciprocalDispersionPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomDispersionGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
dispersionPmeEnergyBuffer
;
CudaSort
*
sort
;
CudaSort
*
sort
;
Kernel
cpuPme
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
PmeIO
*
dispersionPmeio
;
CUstream
pmeStream
;
CUstream
pmeStream
,
dispersionPmeStream
;
CUevent
pmeSyncEvent
;
CUevent
pmeSyncEvent
,
dispersionPmeSyncEvent
;
CudaFFT3D
*
fft
;
CudaFFT3D
*
fft
;
cufftHandle
fftForward
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
cufftHandle
fftBackward
;
...
@@ -710,7 +703,7 @@ private:
...
@@ -710,7 +703,7 @@ private:
CUfunction
pmeInterpolateDispersionForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
dbcc494f
This diff is collapsed.
Click to expand it.
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
dbcc494f
...
@@ -30,7 +30,8 @@
...
@@ -30,7 +30,8 @@
const
real
dar6
=
dar4
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
c6
=
C6s1
*
C6s2
;
const
real2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
...
...
platforms/cuda/src/kernels/ljpme.cu
View file @
dbcc494f
...
@@ -22,7 +22,7 @@ extern "C" __global__ void findAtomDispersionGridIndex(const real4* __restrict__
...
@@ -22,7 +22,7 @@ extern "C" __global__ void findAtomDispersionGridIndex(const real4* __restrict__
extern
"C"
__global__
void
gridSpreadC6
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadC6
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
const
real
2
*
__restrict__
sigmaEpsilon
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -63,7 +63,9 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
...
@@ -63,7 +63,9 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
data
[
0
]
=
scale
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
data
[
0
]
=
scale
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
// Spread the charge from this atom onto each grid point.
// Spread the charge from this atom onto each grid point.
const
real2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
c6
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
...
@@ -81,8 +83,7 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
...
@@ -81,8 +83,7 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
int
index
=
ybase
+
zindex
;
// We need to grab the C6 coefficient from the array
real
add
=
c6
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
C6s
[
atom
]
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
@@ -241,7 +242,7 @@ gridEvaluateDispersionEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __r
...
@@ -241,7 +242,7 @@ gridEvaluateDispersionEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __r
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateDispersionForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateDispersionForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
2
*
__restrict__
sigmaEpsilon
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -287,7 +288,7 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
...
@@ -287,7 +288,7 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
// Compute the force on this atom.
// Compute the force on this atom.
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
...
@@ -313,7 +314,8 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
...
@@ -313,7 +314,8 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
}
}
}
}
}
}
real
q
=
C6s
[
atom
];
const
real2
sigEps
=
sigmaEpsilon
[
atom
];
real
q
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
real
forceX
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceX
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceY
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
DISPERSION_GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
real
forceZ
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
DISPERSION_GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment