Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
734d5d67
"wrappers/python/vscode:/vscode.git/clone" did not exist on "234425d1741fce83813052f8fab2cd092129cc67"
Commit
734d5d67
authored
Mar 22, 2010
by
Scott Le Grand
Browse files
No commit message
No commit message
parent
a409f0e8
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
274 additions
and
44 deletions
+274
-44
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+3
-3
platforms/cuda/src/kernels/kBrownianUpdate.cu
platforms/cuda/src/kernels/kBrownianUpdate.cu
+18
-2
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+9
-1
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+9
-1
platforms/cuda/src/kernels/kCalculateCustomTorsionForces.cu
platforms/cuda/src/kernels/kCalculateCustomTorsionForces.cu
+9
-1
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
+9
-1
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
+9
-1
platforms/cuda/src/kernels/kCalculateLocalForces.cu
platforms/cuda/src/kernels/kCalculateLocalForces.cu
+9
-1
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+6
-2
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+9
-1
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+9
-1
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+45
-7
platforms/cuda/src/kernels/kLangevinUpdate.cu
platforms/cuda/src/kernels/kLangevinUpdate.cu
+9
-1
platforms/cuda/src/kernels/kLangevinUpdate.h
platforms/cuda/src/kernels/kLangevinUpdate.h
+21
-5
platforms/cuda/src/kernels/kRandom.cu
platforms/cuda/src/kernels/kRandom.cu
+9
-1
platforms/cuda/src/kernels/kSettle.cu
platforms/cuda/src/kernels/kSettle.cu
+18
-2
platforms/cuda/src/kernels/kShakeH.cu
platforms/cuda/src/kernels/kShakeH.cu
+41
-5
platforms/cuda/src/kernels/kVerletUpdate.cu
platforms/cuda/src/kernels/kVerletUpdate.cu
+9
-1
platforms/cuda/src/kernels/kVerletUpdate.h
platforms/cuda/src/kernels/kVerletUpdate.h
+23
-7
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
734d5d67
...
@@ -222,12 +222,12 @@ static const int GF1XX_BLOCKS_PER_SM = 3;
...
@@ -222,12 +222,12 @@ static const int GF1XX_BLOCKS_PER_SM = 3;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_NONBOND_THREADS_PER_BLOCK
=
320
;
static
const
int
GT2XX_NONBOND_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_NONBOND_THREADS_PER_BLOCK
=
768
;
//static const int GF1XX_NONBOND_THREADS_PER_BLOCK = 768;
//static const int GF1XX_NONBOND_THREADS_PER_BLOCK = 768;
static
const
int
G8X_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
=
320
;
static
const
int
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
=
768
;
//static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK = 768;
//static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK = 768;
static
const
int
G8X_SHAKE_THREADS_PER_BLOCK
=
128
;
static
const
int
G8X_SHAKE_THREADS_PER_BLOCK
=
128
;
...
@@ -252,7 +252,7 @@ static const int GF1XX_RANDOM_THREADS_PER_BLOCK = 768;
...
@@ -252,7 +252,7 @@ static const int GF1XX_RANDOM_THREADS_PER_BLOCK = 768;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GF1XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GF1XX_NONBOND_WORKUNITS_PER_SM
=
768
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
unsigned
int
MAX_TABULATED_FUNCTIONS
=
4
;
static
const
unsigned
int
MAX_TABULATED_FUNCTIONS
=
4
;
...
...
platforms/cuda/src/kernels/kBrownianUpdate.cu
View file @
734d5d67
...
@@ -51,7 +51,15 @@ void GetBrownianUpdateSim(gpuContext gpu)
...
@@ -51,7 +51,15 @@ void GetBrownianUpdateSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
}
__global__
void
kBrownianUpdatePart1_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kBrownianUpdatePart1_kernel
()
{
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
...
@@ -82,7 +90,15 @@ void kBrownianUpdatePart1(gpuContext gpu)
...
@@ -82,7 +90,15 @@ void kBrownianUpdatePart1(gpuContext gpu)
LAUNCHERROR
(
"kBrownianUpdatePart1"
);
LAUNCHERROR
(
"kBrownianUpdatePart1"
);
}
}
__global__
void
kBrownianUpdatePart2_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kBrownianUpdatePart2_kernel
()
{
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
734d5d67
...
@@ -33,7 +33,15 @@
...
@@ -33,7 +33,15 @@
/* Cuda compiler on Windows does not recognized "static const float" values */
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
734d5d67
...
@@ -33,7 +33,15 @@
...
@@ -33,7 +33,15 @@
/* Cuda compiler on Windows does not recognized "static const float" values */
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
...
...
platforms/cuda/src/kernels/kCalculateCustomTorsionForces.cu
View file @
734d5d67
...
@@ -118,7 +118,15 @@ void SetCustomTorsionGlobalParams(const vector<float>& paramValues)
...
@@ -118,7 +118,15 @@ void SetCustomTorsionGlobalParams(const vector<float>& paramValues)
angle = (dp >= 0) ? angle : -angle; \
angle = (dp >= 0) ? angle : -angle; \
}
}
__global__
void
kCalculateCustomTorsionForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_LOCALFORCES_THREADS_PER_BLOCK
,
1
)
#endif
void
kCalculateCustomTorsionForces_kernel
()
{
{
extern
__shared__
float
stack
[];
extern
__shared__
float
stack
[];
float
*
variables
=
(
float
*
)
&
stack
[
cSim
.
customExpressionStackSize
*
blockDim
.
x
];
float
*
variables
=
(
float
*
)
&
stack
[
cSim
.
customExpressionStackSize
*
blockDim
.
x
];
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
View file @
734d5d67
...
@@ -37,7 +37,15 @@
...
@@ -37,7 +37,15 @@
#include "kCalculateGBVIAux.h"
#include "kCalculateGBVIAux.h"
__global__
void
METHOD_NAME
(
kCalculateGBVI
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateGBVI
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
...
...
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
View file @
734d5d67
...
@@ -37,7 +37,15 @@
...
@@ -37,7 +37,15 @@
* different versions of the kernels.
* different versions of the kernels.
*/
*/
__global__
void
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#endif
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
...
...
platforms/cuda/src/kernels/kCalculateLocalForces.cu
View file @
734d5d67
...
@@ -123,7 +123,15 @@ void GetCalculateLocalForcesSim(gpuContext gpu)
...
@@ -123,7 +123,15 @@ void GetCalculateLocalForcesSim(gpuContext gpu)
}
}
__global__
void
kCalculateLocalForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_LOCALFORCES_THREADS_PER_BLOCK
,
1
)
#endif
void
kCalculateLocalForces_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
Vectors
*
A
=
&
sV
[
threadIdx
.
x
];
Vectors
*
A
=
&
sV
[
threadIdx
.
x
];
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
734d5d67
...
@@ -97,7 +97,9 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
...
@@ -97,7 +97,9 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
#include "kCalculateObcGbsaBornSum.h"
#include "kCalculateObcGbsaBornSum.h"
__global__
void
kClearObcGbsaBornSum_kernel
()
__global__
__launch_bounds__
(
384
,
1
)
void
kClearObcGbsaBornSum_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
...
@@ -107,7 +109,9 @@ __global__ void kClearObcGbsaBornSum_kernel()
...
@@ -107,7 +109,9 @@ __global__ void kClearObcGbsaBornSum_kernel()
}
}
}
}
__global__
void
kReduceObcGbsaBornSum_kernel
()
__global__
__launch_bounds__
(
384
,
1
)
void
kReduceObcGbsaBornSum_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
View file @
734d5d67
...
@@ -30,7 +30,15 @@
...
@@ -30,7 +30,15 @@
* different versions of the kernels.
* different versions of the kernels.
*/
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
/*
/*
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
View file @
734d5d67
...
@@ -30,7 +30,15 @@
...
@@ -30,7 +30,15 @@
* different versions of the kernels.
* different versions of the kernels.
*/
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
...
...
platforms/cuda/src/kernels/kForces.cu
View file @
734d5d67
...
@@ -53,7 +53,9 @@ void GetForcesSim(gpuContext gpu)
...
@@ -53,7 +53,9 @@ void GetForcesSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
}
}
__global__
void
kClearForces_kernel
()
__global__
__launch_bounds__
(
384
,
1
)
void
kClearForces_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
...
@@ -70,7 +72,9 @@ void kClearForces(gpuContext gpu)
...
@@ -70,7 +72,9 @@ void kClearForces(gpuContext gpu)
LAUNCHERROR
(
"kClearForces"
);
LAUNCHERROR
(
"kClearForces"
);
}
}
__global__
void
kClearBornForces_kernel
()
__global__
__launch_bounds__
(
384
,
1
)
void
kClearBornForces_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
...
@@ -87,7 +91,9 @@ void kClearBornForces(gpuContext gpu)
...
@@ -87,7 +91,9 @@ void kClearBornForces(gpuContext gpu)
LAUNCHERROR
(
"kClearBornForces"
);
LAUNCHERROR
(
"kClearBornForces"
);
}
}
__global__
void
kClearEnergy_kernel
()
__global__
__launch_bounds__
(
384
,
1
)
void
kClearEnergy_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
energyOutputBuffers
)
while
(
pos
<
cSim
.
energyOutputBuffers
)
...
@@ -104,7 +110,15 @@ void kClearEnergy(gpuContext gpu)
...
@@ -104,7 +110,15 @@ void kClearEnergy(gpuContext gpu)
LAUNCHERROR
(
"kClearEnergy"
);
LAUNCHERROR
(
"kClearEnergy"
);
}
}
__global__
void
kReduceBornSumAndForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceBornSumAndForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
@@ -207,7 +221,15 @@ void kReduceBornSumAndForces(gpuContext gpu)
...
@@ -207,7 +221,15 @@ void kReduceBornSumAndForces(gpuContext gpu)
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
}
}
__global__
void
kReduceForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
@@ -269,7 +291,15 @@ double kReduceEnergy(gpuContext gpu)
...
@@ -269,7 +291,15 @@ double kReduceEnergy(gpuContext gpu)
return
sum
;
return
sum
;
}
}
__global__
void
kReduceObcGbsaBornForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaBornForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
float
energy
=
0.0
f
;
...
@@ -327,7 +357,15 @@ __global__ void kReduceObcGbsaBornForces_kernel()
...
@@ -327,7 +357,15 @@ __global__ void kReduceObcGbsaBornForces_kernel()
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
}
}
__global__
void
kReduceGBVIBornForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
float
energy
=
0.0
f
;
...
...
platforms/cuda/src/kernels/kLangevinUpdate.cu
View file @
734d5d67
...
@@ -101,7 +101,15 @@ void kLangevinUpdatePart2(gpuContext gpu)
...
@@ -101,7 +101,15 @@ void kLangevinUpdatePart2(gpuContext gpu)
}
}
__global__
void
kSelectLangevinStepSize_kernel
(
float
maxStepSize
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kSelectLangevinStepSize_kernel
(
float
maxStepSize
)
{
{
// Calculate the error.
// Calculate the error.
...
...
platforms/cuda/src/kernels/kLangevinUpdate.h
View file @
734d5d67
...
@@ -31,13 +31,21 @@
...
@@ -31,13 +31,21 @@
* different versions of the kernels.
* different versions of the kernels.
*/
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
#ifdef REMOVE_CM
#ifdef REMOVE_CM
__global__
void
kLangevinUpdatePart1CM_kernel
()
void
kLangevinUpdatePart1CM_kernel
()
#else
#else
__global__
void
kLangevinUpdatePart1_kernel
()
void
kLangevinUpdatePart1_kernel
()
#endif
#endif
{
{
__shared__
float
params
[
MaxParams
];
__shared__
volatile
float
params
[
MaxParams
];
if
(
threadIdx
.
x
<
MaxParams
)
if
(
threadIdx
.
x
<
MaxParams
)
params
[
threadIdx
.
x
]
=
cSim
.
pLangevinParameters
[
threadIdx
.
x
];
params
[
threadIdx
.
x
]
=
cSim
.
pLangevinParameters
[
threadIdx
.
x
];
__syncthreads
();
__syncthreads
();
...
@@ -127,10 +135,18 @@ __global__ void kLangevinUpdatePart1_kernel()
...
@@ -127,10 +135,18 @@ __global__ void kLangevinUpdatePart1_kernel()
}
}
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
#ifdef REMOVE_CM
#ifdef REMOVE_CM
__global__
void
kLangevinUpdatePart2CM_kernel
()
void
kLangevinUpdatePart2CM_kernel
()
#else
#else
__global__
void
kLangevinUpdatePart2_kernel
()
void
kLangevinUpdatePart2_kernel
()
#endif
#endif
{
{
__shared__
float
params
[
MaxParams
];
__shared__
float
params
[
MaxParams
];
...
...
platforms/cuda/src/kernels/kRandom.cu
View file @
734d5d67
...
@@ -54,7 +54,15 @@ void GetRandomSim(gpuContext gpu)
...
@@ -54,7 +54,15 @@ void GetRandomSim(gpuContext gpu)
extern
__shared__
float3
sRand
[];
extern
__shared__
float3
sRand
[];
__global__
void
kGenerateRandoms_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_RANDOM_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_RANDOM_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_RANDOM_THREADS_PER_BLOCK
,
1
)
#endif
void
kGenerateRandoms_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
increment
=
blockDim
.
x
*
gridDim
.
x
;
unsigned
int
increment
=
blockDim
.
x
*
gridDim
.
x
;
...
...
platforms/cuda/src/kernels/kSettle.cu
View file @
734d5d67
...
@@ -57,7 +57,15 @@ void GetSettleSim(gpuContext gpu)
...
@@ -57,7 +57,15 @@ void GetSettleSim(gpuContext gpu)
* S. Miyamoto and P. Kollman, J. Comp. Chem., vol 13, no. 8, pp. 952-962 (1992).
* S. Miyamoto and P. Kollman, J. Comp. Chem., vol 13, no. 8, pp. 952-962 (1992).
*/
*/
__global__
void
kApplyFirstSettle_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_SHAKE_THREADS_PER_BLOCK
,
1
)
#endif
void
kApplyFirstSettle_kernel
()
{
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
settleConstraints
)
while
(
pos
<
cSim
.
settleConstraints
)
...
@@ -231,7 +239,15 @@ void kApplyFirstSettle(gpuContext gpu)
...
@@ -231,7 +239,15 @@ void kApplyFirstSettle(gpuContext gpu)
}
}
}
}
__global__
void
kApplySecondSettle_kernel
()
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_SHAKE_THREADS_PER_BLOCK
,
1
)
#endif
kApplySecondSettle_kernel
()
{
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
settleConstraints
)
while
(
pos
<
cSim
.
settleConstraints
)
...
...
platforms/cuda/src/kernels/kShakeH.cu
View file @
734d5d67
...
@@ -64,9 +64,23 @@ void GetShakeHSim(gpuContext gpu)
...
@@ -64,9 +64,23 @@ void GetShakeHSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
}
__global__
void
kApplyFirstShake_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_SHAKE_THREADS_PER_BLOCK
,
1
)
#endif
void
kApplyFirstShake_kernel
()
{
{
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
#if (__CUDA_ARCH__ >= 200)
__shared__
Atom
sA
[
GF1XX_SHAKE_THREADS_PER_BLOCK
];
#elif (__CUDA_ARCH__ >= 130)
__shared__
Atom
sA
[
GT2XX_SHAKE_THREADS_PER_BLOCK
];
#else
__shared__
Atom
sA
[
G8X_SHAKE_THREADS_PER_BLOCK
];
#endif
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ShakeConstraints
)
while
(
pos
<
cSim
.
ShakeConstraints
)
...
@@ -226,9 +240,23 @@ void kApplyFirstShake(gpuContext gpu)
...
@@ -226,9 +240,23 @@ void kApplyFirstShake(gpuContext gpu)
}
}
}
}
__global__
void
kApplySecondShake_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_SHAKE_THREADS_PER_BLOCK
,
1
)
#endif
void
kApplySecondShake_kernel
()
{
{
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
#if (__CUDA_ARCH__ >= 200)
__shared__
Atom
sA
[
GF1XX_SHAKE_THREADS_PER_BLOCK
];
#elif (__CUDA_ARCH__ >= 130)
__shared__
Atom
sA
[
GT2XX_SHAKE_THREADS_PER_BLOCK
];
#else
__shared__
Atom
sA
[
G8X_SHAKE_THREADS_PER_BLOCK
];
#endif
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ShakeConstraints
)
while
(
pos
<
cSim
.
ShakeConstraints
)
...
@@ -394,7 +422,15 @@ __global__ void kApplySecondShake_kernel()
...
@@ -394,7 +422,15 @@ __global__ void kApplySecondShake_kernel()
}
}
}
}
__global__
void
kApplyNoShake_kernel
()
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_SHAKE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_SHAKE_THREADS_PER_BLOCK
,
1
)
#endif
kApplyNoShake_kernel
()
{
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
NonShakeConstraints
)
while
(
pos
<
cSim
.
NonShakeConstraints
)
...
...
platforms/cuda/src/kernels/kVerletUpdate.cu
View file @
734d5d67
...
@@ -89,7 +89,15 @@ void kVerletUpdatePart2(gpuContext gpu)
...
@@ -89,7 +89,15 @@ void kVerletUpdatePart2(gpuContext gpu)
}
}
}
}
__global__
void
kSelectVerletStepSize_kernel
(
float
maxStepSize
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kSelectVerletStepSize_kernel
(
float
maxStepSize
)
{
{
// Calculate the error.
// Calculate the error.
...
...
platforms/cuda/src/kernels/kVerletUpdate.h
View file @
734d5d67
...
@@ -30,15 +30,23 @@
...
@@ -30,15 +30,23 @@
* different versions of the kernels.
* different versions of the kernels.
*/
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
#ifdef REMOVE_CM
#ifdef REMOVE_CM
__global__
void
kVerletUpdatePart1CM_kernel
()
void
kVerletUpdatePart1CM_kernel
()
#else
#else
__global__
void
kVerletUpdatePart1_kernel
()
void
kVerletUpdatePart1_kernel
()
#endif
#endif
{
{
// Load the step size to take.
// Load the step size to take.
__shared__
float
dtPos
;
__shared__
volatile
float
dtPos
;
__shared__
float
dtVel
;
__shared__
volatile
float
dtVel
;
if
(
threadIdx
.
x
==
0
)
if
(
threadIdx
.
x
==
0
)
{
{
float2
stepSize
=
cSim
.
pStepSize
[
0
];
float2
stepSize
=
cSim
.
pStepSize
[
0
];
...
@@ -111,10 +119,18 @@ __global__ void kVerletUpdatePart1_kernel()
...
@@ -111,10 +119,18 @@ __global__ void kVerletUpdatePart1_kernel()
}
}
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
#ifdef REMOVE_CM
#ifdef REMOVE_CM
__global__
void
kVerletUpdatePart2CM_kernel
()
void
kVerletUpdatePart2CM_kernel
()
#else
#else
__global__
void
kVerletUpdatePart2_kernel
()
void
kVerletUpdatePart2_kernel
()
#endif
#endif
{
{
// Load the step size to take.
// Load the step size to take.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment