Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a4b2a13b
Commit
a4b2a13b
authored
Aug 27, 2009
by
Peter Eastman
Browse files
Beginnings of the CUDA implementation of PME
parent
e009228a
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
311 additions
and
16 deletions
+311
-16
platforms/cuda/sharedTarget/CMakeLists.txt
platforms/cuda/sharedTarget/CMakeLists.txt
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+18
-12
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+2
-0
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+12
-1
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+45
-0
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+10
-0
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+18
-0
platforms/cuda/src/kernels/kCalculateLocalForces.cu
platforms/cuda/src/kernels/kCalculateLocalForces.cu
+1
-1
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+203
-0
platforms/cuda/staticTarget/CMakeLists.txt
platforms/cuda/staticTarget/CMakeLists.txt
+1
-1
No files found.
platforms/cuda/sharedTarget/CMakeLists.txt
View file @
a4b2a13b
...
@@ -17,7 +17,7 @@ CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/jama/include)
...
@@ -17,7 +17,7 @@ CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/jama/include)
CUDA_ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
CUDA_ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
debug
${
OPENMM_LIBRARY_NAME
}
_d optimized
${
OPENMM_LIBRARY_NAME
}
cudpp cutil
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
debug
${
OPENMM_LIBRARY_NAME
}
_d optimized
${
OPENMM_LIBRARY_NAME
}
cudpp cutil
${
CUFFT_TARGET_LINK
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cuda/src/CudaKernels.cpp
View file @
a4b2a13b
...
@@ -298,7 +298,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -298,7 +298,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
gpuSetPeriodicBoxSize
(
gpu
,
(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]);
gpuSetPeriodicBoxSize
(
gpu
,
(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]);
method
=
PERIODIC
;
method
=
PERIODIC
;
}
}
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
||
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
force
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
force
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
gpuSetPeriodicBoxSize
(
gpu
,
(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]);
gpuSetPeriodicBoxSize
(
gpu
,
(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]);
...
@@ -308,17 +308,23 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -308,17 +308,23 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
double
my
=
boxVectors
[
1
][
1
]
/
force
.
getCutoffDistance
();
double
my
=
boxVectors
[
1
][
1
]
/
force
.
getCutoffDistance
();
double
mz
=
boxVectors
[
2
][
2
]
/
force
.
getCutoffDistance
();
double
mz
=
boxVectors
[
2
][
2
]
/
force
.
getCutoffDistance
();
double
pi
=
3.1415926535897932385
;
double
pi
=
3.1415926535897932385
;
int
kmaxx
=
(
int
)
std
::
ceil
(
-
(
mx
/
pi
)
*
std
::
log
(
ewaldErrorTol
));
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
int
kmaxy
=
(
int
)
std
::
ceil
(
-
(
my
/
pi
)
*
std
::
log
(
ewaldErrorTol
));
int
kmaxx
=
(
int
)
std
::
ceil
(
-
(
mx
/
pi
)
*
std
::
log
(
ewaldErrorTol
));
int
kmaxz
=
(
int
)
std
::
ceil
(
-
(
mz
/
pi
)
*
std
::
log
(
ewaldErrorTol
));
int
kmaxy
=
(
int
)
std
::
ceil
(
-
(
my
/
pi
)
*
std
::
log
(
ewaldErrorTol
));
if
(
kmaxx
%
2
==
0
)
int
kmaxz
=
(
int
)
std
::
ceil
(
-
(
mz
/
pi
)
*
std
::
log
(
ewaldErrorTol
));
kmaxx
++
;
if
(
kmaxx
%
2
==
0
)
if
(
kmaxy
%
2
==
0
)
kmaxx
++
;
kmaxy
++
;
if
(
kmaxy
%
2
==
0
)
if
(
kmaxz
%
2
==
0
)
kmaxy
++
;
kmaxz
++
;
if
(
kmaxz
%
2
==
0
)
gpuSetEwaldParameters
(
gpu
,
(
float
)
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
kmaxz
++
;
method
=
EWALD
;
gpuSetEwaldParameters
(
gpu
,
(
float
)
alpha
,
kmaxx
,
kmaxy
,
kmaxz
);
method
=
EWALD
;
}
else
{
gpuSetPMEParameters
(
gpu
,
(
float
)
alpha
);
method
=
PARTICLE_MESH_EWALD
;
}
}
}
data
.
nonbondedMethod
=
method
;
data
.
nonbondedMethod
=
method
;
gpuSetCoulombParameters
(
gpu
,
138.935485
f
,
particle
,
c6
,
c12
,
q
,
symbol
,
exclusionList
,
method
);
gpuSetCoulombParameters
(
gpu
,
138.935485
f
,
particle
,
c6
,
c12
,
q
,
symbol
,
exclusionList
,
method
);
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
a4b2a13b
...
@@ -77,6 +77,8 @@ extern void SetCalculateObcGbsaForces2Sim(gpuContext gpu);
...
@@ -77,6 +77,8 @@ extern void SetCalculateObcGbsaForces2Sim(gpuContext gpu);
extern
void
GetCalculateObcGbsaForces2Sim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaForces2Sim
(
gpuContext
gpu
);
extern
void
SetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
SetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
GetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
GetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
SetCalculatePMESim
(
gpuContext
gpu
);
extern
void
GetCalculatePMESim
(
gpuContext
gpu
);
extern
void
SetForcesSim
(
gpuContext
gpu
);
extern
void
SetForcesSim
(
gpuContext
gpu
);
extern
void
GetForcesSim
(
gpuContext
gpu
);
extern
void
GetForcesSim
(
gpuContext
gpu
);
extern
void
SetShakeHSim
(
gpuContext
gpu
);
extern
void
SetShakeHSim
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/cudatypes.h
View file @
a4b2a13b
...
@@ -35,6 +35,7 @@
...
@@ -35,6 +35,7 @@
#include <string>
#include <string>
#include <cuda.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <builtin_types.h>
#include <builtin_types.h>
#include <vector_functions.h>
#include <vector_functions.h>
...
@@ -236,12 +237,15 @@ static const unsigned int MAX_TABULATED_FUNCTIONS = 4;
...
@@ -236,12 +237,15 @@ static const unsigned int MAX_TABULATED_FUNCTIONS = 4;
static
const
float
PI
=
3.14159265358979323846
f
;
static
const
float
PI
=
3.14159265358979323846
f
;
static
const
int
PME_ORDER
=
4
;
enum
CudaNonbondedMethod
enum
CudaNonbondedMethod
{
{
NO_CUTOFF
,
NO_CUTOFF
,
CUTOFF
,
CUTOFF
,
PERIODIC
,
PERIODIC
,
EWALD
EWALD
,
PARTICLE_MESH_EWALD
};
};
enum
ExpressionOp
{
enum
ExpressionOp
{
...
@@ -354,6 +358,13 @@ struct cudaGmxSimulation {
...
@@ -354,6 +358,13 @@ struct cudaGmxSimulation {
float4
*
pTabulatedFunctionCoefficients
[
MAX_TABULATED_FUNCTIONS
];
// The spline coefficients for each tabulated function
float4
*
pTabulatedFunctionCoefficients
[
MAX_TABULATED_FUNCTIONS
];
// The spline coefficients for each tabulated function
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float4
*
pTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
int3
pmeGridSize
;
// The dimensions of the grid for particle mesh Ewald
cufftComplex
*
pPmeGrid
;
// Grid points for particle mesh Ewald
float
*
pPmeBsplineModuli
[
3
];
float4
*
pPmeBsplineTheta
;
float4
*
pPmeBsplineDtheta
;
int4
*
pPmeParticleIndex
;
// The grid indices for each atom
float4
*
pPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
unsigned
int
bonds
;
// Number of bonds
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
float2
*
pBondParameter
;
// Bond parameters
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
a4b2a13b
...
@@ -757,6 +757,31 @@ void gpuSetEwaldParameters(gpuContext gpu, float alpha, int kmaxx, int kmaxy, in
...
@@ -757,6 +757,31 @@ void gpuSetEwaldParameters(gpuContext gpu, float alpha, int kmaxx, int kmaxy, in
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
}
}
extern
"C"
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
)
{
gpu
->
sim
.
alphaEwald
=
alpha
;
int3
gridSize
=
make_int3
(
16
,
16
,
16
);
gpu
->
sim
.
pmeGridSize
=
gridSize
;
cufftPlan3d
(
&
gpu
->
fftplan
,
gridSize
.
x
,
gridSize
.
y
,
gridSize
.
z
,
CUFFT_C2C
);
gpu
->
psPmeGrid
=
new
CUDAStream
<
cufftComplex
>
(
gridSize
.
x
*
gridSize
.
y
*
gridSize
.
z
,
1
,
"PmeGrid"
);
gpu
->
sim
.
pPmeGrid
=
gpu
->
psPmeGrid
->
_pDevData
;
gpu
->
psPmeBsplineModuli
[
0
]
=
new
CUDAStream
<
float
>
(
gridSize
.
x
,
1
,
"PmeBsplineModuli0"
);
gpu
->
sim
.
pPmeBsplineModuli
[
0
]
=
gpu
->
psPmeBsplineModuli
[
0
]
->
_pDevData
;
gpu
->
psPmeBsplineModuli
[
1
]
=
new
CUDAStream
<
float
>
(
gridSize
.
y
,
1
,
"PmeBsplineModuli1"
);
gpu
->
sim
.
pPmeBsplineModuli
[
1
]
=
gpu
->
psPmeBsplineModuli
[
1
]
->
_pDevData
;
gpu
->
psPmeBsplineModuli
[
2
]
=
new
CUDAStream
<
float
>
(
gridSize
.
z
,
1
,
"PmeBsplineModuli2"
);
gpu
->
sim
.
pPmeBsplineModuli
[
2
]
=
gpu
->
psPmeBsplineModuli
[
2
]
->
_pDevData
;
gpu
->
psPmeBsplineTheta
=
new
CUDAStream
<
float4
>
(
PME_ORDER
*
gpu
->
natoms
,
1
,
"PmeBsplineTheta"
);
gpu
->
sim
.
pPmeBsplineTheta
=
gpu
->
psPmeBsplineTheta
->
_pDevData
;
gpu
->
psPmeBsplineDtheta
=
new
CUDAStream
<
float4
>
(
PME_ORDER
*
gpu
->
natoms
,
1
,
"PmeBsplineDtheta"
);
gpu
->
sim
.
pPmeBsplineDtheta
=
gpu
->
psPmeBsplineDtheta
->
_pDevData
;
gpu
->
psPmeParticleIndex
=
new
CUDAStream
<
int4
>
(
gpu
->
natoms
,
1
,
"PmeParticleIndex"
);
gpu
->
sim
.
pPmeParticleIndex
=
gpu
->
psPmeParticleIndex
->
_pDevData
;
gpu
->
psPmeParticleFraction
=
new
CUDAStream
<
float4
>
(
gpu
->
natoms
,
1
,
"PmeParticleFraction"
);
gpu
->
sim
.
pPmeParticleFraction
=
gpu
->
psPmeParticleFraction
->
_pDevData
;
}
extern
"C"
extern
"C"
void
gpuSetPeriodicBoxSize
(
gpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
)
void
gpuSetPeriodicBoxSize
(
gpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
)
{
{
...
@@ -1589,6 +1614,14 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
...
@@ -1589,6 +1614,14 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
psCustomExceptionID
=
NULL
;
gpu
->
psCustomExceptionID
=
NULL
;
gpu
->
psCustomExceptionParams
=
NULL
;
gpu
->
psCustomExceptionParams
=
NULL
;
gpu
->
psEwaldCosSinSum
=
NULL
;
gpu
->
psEwaldCosSinSum
=
NULL
;
gpu
->
psPmeGrid
=
NULL
;
gpu
->
psPmeBsplineModuli
[
0
]
=
NULL
;
gpu
->
psPmeBsplineModuli
[
1
]
=
NULL
;
gpu
->
psPmeBsplineModuli
[
2
]
=
NULL
;
gpu
->
psPmeBsplineTheta
=
NULL
;
gpu
->
psPmeBsplineDtheta
=
NULL
;
gpu
->
psPmeParticleIndex
=
NULL
;
gpu
->
psPmeParticleFraction
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
gpu
->
psSettleID
=
NULL
;
gpu
->
psSettleID
=
NULL
;
...
@@ -1747,6 +1780,17 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1747,6 +1780,17 @@ void gpuShutDown(gpuContext gpu)
}
}
if
(
gpu
->
psEwaldCosSinSum
!=
NULL
)
if
(
gpu
->
psEwaldCosSinSum
!=
NULL
)
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psEwaldCosSinSum
;
if
(
gpu
->
psPmeGrid
!=
NULL
)
{
delete
gpu
->
psPmeGrid
;
delete
gpu
->
psPmeBsplineModuli
[
0
];
delete
gpu
->
psPmeBsplineModuli
[
1
];
delete
gpu
->
psPmeBsplineModuli
[
2
];
delete
gpu
->
psPmeBsplineTheta
;
delete
gpu
->
psPmeBsplineDtheta
;
delete
gpu
->
psPmeParticleIndex
;
delete
gpu
->
psPmeParticleFraction
;
cufftDestroy
(
gpu
->
fftplan
);
}
delete
gpu
->
psObcData
;
delete
gpu
->
psObcData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornForce
;
...
@@ -2102,6 +2146,7 @@ int gpuSetConstants(gpuContext gpu)
...
@@ -2102,6 +2146,7 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateObcGbsaBornSumSim
(
gpu
);
SetCalculateObcGbsaBornSumSim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetCalculatePMESim
(
gpu
);
SetForcesSim
(
gpu
);
SetForcesSim
(
gpu
);
SetShakeHSim
(
gpu
);
SetShakeHSim
(
gpu
);
SetLangevinUpdateSim
(
gpu
);
SetLangevinUpdateSim
(
gpu
);
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
a4b2a13b
...
@@ -91,6 +91,7 @@ struct _gpuContext {
...
@@ -91,6 +91,7 @@ struct _gpuContext {
unsigned
long
seed
;
unsigned
long
seed
;
SM_VERSION
sm_version
;
SM_VERSION
sm_version
;
compactionPlan
compactPlan
;
compactionPlan
compactPlan
;
cufftHandle
fftplan
;
CUDAStream
<
float4
>*
psPosq4
;
CUDAStream
<
float4
>*
psPosq4
;
CUDAStream
<
float4
>*
psPosqP4
;
CUDAStream
<
float4
>*
psPosqP4
;
CUDAStream
<
float4
>*
psOldPosq4
;
CUDAStream
<
float4
>*
psOldPosq4
;
...
@@ -105,6 +106,12 @@ struct _gpuContext {
...
@@ -105,6 +106,12 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psCustomExceptionParams
;
// Parameters for custom nonbonded exceptions
CUDAStream
<
float4
>*
psCustomExceptionParams
;
// Parameters for custom nonbonded exceptions
CUDAStream
<
float4
>*
psTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
CUDAStream
<
float4
>*
psTabulatedFunctionParams
;
// The min, max, and spacing for each tabulated function
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
cufftComplex
>*
psPmeGrid
;
// Grid points for particle mesh Ewald
CUDAStream
<
float
>*
psPmeBsplineModuli
[
3
];
CUDAStream
<
float4
>*
psPmeBsplineTheta
;
CUDAStream
<
float4
>*
psPmeBsplineDtheta
;
CUDAStream
<
int4
>*
psPmeParticleIndex
;
// The grid indices for each atom
CUDAStream
<
float4
>*
psPmeParticleFraction
;
// Fractional offset in the grid for each atom in all three dimensions.
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psBornForce
;
CUDAStream
<
float
>*
psBornForce
;
...
@@ -206,6 +213,9 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const std::vector<std::vect
...
@@ -206,6 +213,9 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const std::vector<std::vect
extern
"C"
extern
"C"
void
gpuSetEwaldParameters
(
gpuContext
gpu
,
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
);
void
gpuSetEwaldParameters
(
gpuContext
gpu
,
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
);
extern
"C"
void
gpuSetPMEParameters
(
gpuContext
gpu
,
float
alpha
);
extern
"C"
extern
"C"
void
gpuSetPeriodicBoxSize
(
gpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
);
void
gpuSetPeriodicBoxSize
(
gpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
a4b2a13b
...
@@ -120,6 +120,7 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
...
@@ -120,6 +120,7 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
// Reciprocal Space Ewald summation is in a separate kernel
// Reciprocal Space Ewald summation is in a separate kernel
#include "kCalculateCDLJEwaldFastReciprocal.h"
#include "kCalculateCDLJEwaldFastReciprocal.h"
void
kCalculatePME
(
gpuContext
gpu
);
void
kCalculateCDLJForces
(
gpuContext
gpu
)
void
kCalculateCDLJForces
(
gpuContext
gpu
)
{
{
...
@@ -187,5 +188,22 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -187,5 +188,22 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
break
;
case
PARTICLE_MESH_EWALD
:
kFindBlockBoundsEwaldDirect_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsEwaldDirect"
);
kFindBlocksWithInteractionsEwaldDirect_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsEwaldDirect"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksEwaldDirect_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJEwaldDirectByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
kCalculatePME
(
gpu
);
}
}
}
}
platforms/cuda/src/kernels/kCalculateLocalForces.cu
View file @
a4b2a13b
...
@@ -448,7 +448,7 @@ __global__ void kCalculateLocalForces_kernel()
...
@@ -448,7 +448,7 @@ __global__ void kCalculateLocalForces_kernel()
pos
+=
blockDim
.
x
*
gridDim
.
x
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
if
(
cSim
.
nonbondedMethod
==
NO_CUTOFF
||
cSim
.
nonbondedMethod
==
EWALD
)
if
(
cSim
.
nonbondedMethod
==
NO_CUTOFF
||
cSim
.
nonbondedMethod
==
EWALD
||
cSim
.
nonbondedMethod
==
PARTICLE_MESH_EWALD
)
{
{
while
(
pos
<
cSim
.
LJ14_offset
)
while
(
pos
<
cSim
.
LJ14_offset
)
{
{
...
...
platforms/cuda/src/kernels/kCalculatePME.cu
0 → 100644
View file @
a4b2a13b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Erik Lindahl, Rossen Apostolov, Szilard Pall, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include <cuda.h>
using
namespace
std
;
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculatePMESim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculatePMESim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
inline
__host__
__device__
int
fast_mod
(
int
a
,
int
b
)
{
return
(
b
&
(
b
-
1
))
?
a
%
b
:
a
&
(
b
-
1
);
}
inline
__host__
__device__
float4
make_float4
(
float
s
)
{
return
make_float4
(
s
,
s
,
s
,
s
);
}
inline
__host__
__device__
float4
operator
-
(
float4
&
a
)
{
return
make_float4
(
-
a
.
x
,
-
a
.
y
,
-
a
.
z
,
-
a
.
w
);
}
inline
__host__
__device__
float4
operator
-
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
inline
__host__
__device__
float4
operator
+
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
inline
__host__
__device__
float4
operator
+
(
float4
a
,
float
b
)
{
return
make_float4
(
a
.
x
+
b
,
a
.
y
+
b
,
a
.
z
+
b
,
a
.
w
+
b
);
}
inline
__host__
__device__
float4
operator
+
(
float
a
,
float4
b
)
{
return
make_float4
(
a
+
b
.
x
,
a
+
b
.
y
,
a
+
b
.
z
,
a
+
b
.
w
);
}
inline
__host__
__device__
float4
operator
*
(
float
s
,
float4
a
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
inline
__host__
__device__
float4
operator
*
(
float4
a
,
float4
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
+
b
.
w
);
}
inline
__host__
__device__
float4
make_float4
(
int3
a
)
{
return
make_float4
(
a
.
x
,
a
.
y
,
a
.
z
,
0
);
}
__global__
void
kUpdateGridIndexAndFraction
()
{
unsigned
int
tnb
=
blockDim
.
x
*
gridDim
.
x
;
unsigned
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
{
float4
ftmp
=
cSim
.
pPosq
[
i
];
__syncthreads
();
float3
t
=
make_float3
((
ftmp
.
x
/
cSim
.
periodicBoxSizeX
+
1.0
f
)
*
cSim
.
pmeGridSize
.
x
,
(
ftmp
.
y
/
cSim
.
periodicBoxSizeY
+
1.0
f
)
*
cSim
.
pmeGridSize
.
y
,
(
ftmp
.
z
/
cSim
.
periodicBoxSizeZ
+
1.0
f
)
*
cSim
.
pmeGridSize
.
z
);
float3
tix
;
ftmp
.
x
=
modff
(
t
.
x
,
&
tix
.
x
);
ftmp
.
y
=
modff
(
t
.
y
,
&
tix
.
y
);
ftmp
.
z
=
modff
(
t
.
z
,
&
tix
.
z
);
cSim
.
pPmeParticleFraction
[
i
]
=
ftmp
;
/* avoid costly % operations if possible that is if dc_ngrid.* is pow. of 2 */
int4
itmp
=
make_int4
(
fast_mod
(
__float2int_rd
(
tix
.
x
),
cSim
.
pmeGridSize
.
x
),
fast_mod
(
__float2int_rd
(
tix
.
y
),
cSim
.
pmeGridSize
.
y
),
fast_mod
(
__float2int_rd
(
tix
.
z
),
cSim
.
pmeGridSize
.
z
),
0
);
cSim
.
pPmeParticleIndex
[
i
]
=
itmp
;
__syncthreads
();
}
}
__global__
void
kUpdateBsplines
()
{
unsigned
int
tnb
=
blockDim
.
x
*
gridDim
.
x
;
unsigned
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
extern
__shared__
float4
bsplines_cache
[];
/*size = 2 * block_size * pme_order*/
const
float4
div_o
=
make_float4
(
1.0
f
/
(
PME_ORDER
-
1
));
for
(
int
i
=
tid
;
i
<
cSim
.
atoms
;
i
+=
tnb
)
{
float4
*
data
=
&
bsplines_cache
[
threadIdx
.
x
*
PME_ORDER
];
float4
*
ddata
=
&
bsplines_cache
[
threadIdx
.
x
*
PME_ORDER
+
blockDim
.
x
*
PME_ORDER
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
data
[
j
]
=
make_float4
(
0.0
f
);
ddata
[
j
]
=
make_float4
(
0.0
f
);
}
/* load data */
float4
dr
=
cSim
.
pPmeParticleFraction
[
i
];
__syncthreads
();
data
[
PME_ORDER
-
1
]
=
make_float4
(
0.0
f
);
data
[
1
]
=
dr
;
data
[
0
]
=
make_float4
(
1.0
f
)
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
float
div
=
1.0
f
/
((
float
)
j
-
1.0
f
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
{
data
[
j
-
k
-
1
]
=
div
*
(
(
dr
+
float
(
k
))
*
data
[
j
-
k
-
2
]
+
(
-
dr
+
((
float
)(
j
-
k
)))
*
data
[
j
-
k
-
1
]);
}
data
[
0
]
=
div
*
(
-
dr
+
1
)
*
data
[
0
];
}
ddata
[
0
]
=
-
data
[
0
];
for
(
int
j
=
1
;
j
<
PME_ORDER
;
j
++
)
{
ddata
[
j
]
=
data
[
j
-
1
]
-
data
[
j
];
}
data
[
PME_ORDER
-
1
]
=
div_o
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
{
data
[
PME_ORDER
-
j
-
1
]
=
div_o
*
(
(
dr
+
(
float
)
j
)
*
data
[
PME_ORDER
-
j
-
2
]
+
(
-
dr
+
((
float
)(
PME_ORDER
-
j
)))
*
data
[
PME_ORDER
-
j
-
1
]
);
}
data
[
0
]
=
div_o
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
__syncthreads
();
/* write back */
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
cSim
.
pPmeBsplineTheta
[
i
*
PME_ORDER
+
j
]
=
data
[
j
];
cSim
.
pPmeBsplineDtheta
[
i
*
PME_ORDER
+
j
]
=
ddata
[
j
];
}
__syncthreads
();
}
}
void
kCalculatePME
(
gpuContext
gpu
)
{
// printf("kCalculatePME\n");
kUpdateGridIndexAndFraction
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kUpdateGridIndexAndFraction"
);
kUpdateBsplines
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
2
*
gpu
->
sim
.
update_threads_per_block
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
LAUNCHERROR
(
"kUpdateBsplines"
);
}
platforms/cuda/staticTarget/CMakeLists.txt
View file @
a4b2a13b
...
@@ -25,6 +25,6 @@ SET(CUDA_STATIC_COMPILE_FLAG "-DOPENMM_BUILDING_STATIC_LIBRARY -DOPENMM_USE_STAT
...
@@ -25,6 +25,6 @@ SET(CUDA_STATIC_COMPILE_FLAG "-DOPENMM_BUILDING_STATIC_LIBRARY -DOPENMM_USE_STAT
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES COMPILE_FLAGS
${
CUDA_STATIC_COMPILE_FLAG
}
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES COMPILE_FLAGS
${
CUDA_STATIC_COMPILE_FLAG
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
optimized
${
OPENMM_LIBRARY_NAME
}
_static cudpp cutil
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
optimized
${
OPENMM_LIBRARY_NAME
}
_static cudpp cutil
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
debug
${
OPENMM_LIBRARY_NAME
}
_static_d optimized
${
OPENMM_LIBRARY_NAME
}
_static cudpp cutil
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
debug
${
OPENMM_LIBRARY_NAME
}
_static_d optimized
${
OPENMM_LIBRARY_NAME
}
_static cudpp cutil
${
CUFFT_TARGET_LINK
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment