Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0541e749
Commit
0541e749
authored
Aug 08, 2009
by
Peter Eastman
Browse files
Simplified and optimized calculation of energies
parent
a534f1b2
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
93 additions
and
213 deletions
+93
-213
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+6
-21
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+7
-7
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+8
-4
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+5
-2
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+1
-1
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+9
-30
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+5
-7
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+7
-24
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+4
-5
platforms/cuda/src/kernels/kCalculateLocalForces.cu
platforms/cuda/src/kernels/kCalculateLocalForces.cu
+13
-30
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+26
-75
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
+2
-1
platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
...ms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
+0
-6
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
0541e749
...
...
@@ -41,7 +41,6 @@ using namespace std;
static
void
calcForces
(
ContextImpl
&
context
,
CudaPlatform
::
PlatformData
&
data
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
gpu
->
bReduceEnergies
=
false
;
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
computeForceCount
%
100
==
0
)
gpuReorderAtoms
(
gpu
);
data
.
computeForceCount
++
;
...
...
@@ -88,37 +87,23 @@ static double calcEnergy(ContextImpl& context, CudaPlatform::PlatformData& data,
}
else
{
double
totalEnergy
=
0.0
f
;
double
energy
;
gpu
->
bReduceEnergies
=
true
;
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
stepCount
%
100
==
0
)
gpuReorderAtoms
(
gpu
);
data
.
stepCount
++
;
kClear
Forces
(
gpu
);
kClear
Energy
(
gpu
);
if
(
gpu
->
bIncludeGBSA
)
{
gpu
->
bRecalculateBornRadii
=
true
;
energy
=
kCalculateCDLJObcGbsaForces1
(
gpu
);
// printf("Calculated CDLJObcGbsa energy: %f\n", energy);
totalEnergy
+=
energy
;
energy
=
kReduceObcGbsaBornForces
(
gpu
);
// printf("Calculated reduced GbsaBorn surface area energy: %f\n", energy);
totalEnergy
+=
energy
;
kCalculateCDLJObcGbsaForces1
(
gpu
);
kReduceObcGbsaBornForces
(
gpu
);
kCalculateObcGbsaForces2
(
gpu
);
}
else
if
(
data
.
hasNonbonded
)
{
energy
=
kCalculateCDLJForces
(
gpu
);
// printf("Calculated CDLJ energy: %f\n", energy);
totalEnergy
+=
energy
;
kCalculateCDLJForces
(
gpu
);
}
energy
=
kCalculateLocalForces
(
gpu
);
// printf("Calculated local interactions energy: %f\n", energy);
totalEnergy
+=
energy
;
kCalculateLocalForces
(
gpu
);
if
(
gpu
->
bIncludeGBSA
)
kReduceBornSumAndForces
(
gpu
);
else
kReduceForces
(
gpu
);
// printf("Total GPU energies: %f\n", totalEnergy);
return
totalEnergy
;
return
kReduceEnergy
(
gpu
);
}
return
0.0
f
;
}
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
0541e749
...
...
@@ -28,17 +28,19 @@
// Initialization
extern
void
kClearForces
(
gpuContext
gpu
);
extern
void
kClearEnergy
(
gpuContext
gpu
);
extern
void
kClearBornForces
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kGenerateRandoms
(
gpuContext
gpu
);
// Main loop
extern
double
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
);
extern
double
kCalculateCDLJForces
(
gpuContext
gpu
);
extern
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
);
extern
void
kCalculateCDLJForces
(
gpuContext
gpu
);
extern
void
kCalculateCustomNonbondedForces
(
gpuContext
gpu
,
bool
neighborListValid
);
extern
double
kReduceObcGbsaBornForces
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaForces2
(
gpuContext
gpu
);
extern
double
kCalculateLocalForces
(
gpuContext
gpu
);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
extern
void
kCalculateAndersenThermostat
(
gpuContext
gpu
);
extern
void
kReduceBornSumAndForces
(
gpuContext
gpu
);
extern
void
kApplyFirstShake
(
gpuContext
gpu
);
...
...
@@ -58,9 +60,7 @@ extern void kBrownianUpdatePart2(gpuContext gpu);
// Extras
extern
void
kReduceForces
(
gpuContext
gpu
);
extern
void
kReduceLocalEnergies
(
gpuContext
gpu
,
float
*
gpu_energies
);
extern
double
kReduceNonbondEnergies
(
gpuContext
gpu
,
float
*
gpu_energies
);
extern
void
kClearBornForces
(
gpuContext
gpu
);
extern
double
kReduceEnergy
(
gpuContext
gpu
);
// Initializers
extern
void
SetCalculateCDLJObcGbsaForces1Sim
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/cudatypes.h
View file @
0541e749
...
...
@@ -232,6 +232,9 @@ static const int GT2XX_RANDOM_THREADS_PER_BLOCK = 384;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
float
PI
=
3.14159265358979323846
f
;
enum
CudaNonbondedMethod
{
NO_CUTOFF
,
...
...
@@ -297,9 +300,10 @@ struct cudaGmxSimulation {
unsigned
int
stride2
;
// Atomic attributes stride x 2
unsigned
int
stride3
;
// Atomic attributes stride x 3
unsigned
int
stride4
;
// Atomic attributes stride x 4
unsigned
int
nonbondOutputBuffers
;
// Nonbond output buffers per nonbond call
unsigned
int
nonbondOutputBuffers
;
// Nonbond output buffers per nonbond call
unsigned
int
totalNonbondOutputBuffers
;
// Total nonbond output buffers
unsigned
int
outputBuffers
;
// Number of output buffers
unsigned
int
energyOutputBuffers
;
// Number of energy output buffers
float
bigFloat
;
// Floating point value used as a flag for Shaken atoms
float
epsfac
;
// Epsilon factor for CDLJ calculations
CudaNonbondedMethod
nonbondedMethod
;
// How to handle nonbonded interactions
...
...
@@ -414,10 +418,10 @@ struct cudaGmxSimulation {
float4
*
pForce4
;
// Pointer to all force4 data
float4
*
pForce4a
;
// Pointer to first set of force4 data
float4
*
pForce4b
;
// Pointer to second set of force4 data
float
4
*
pOutForce4
;
// Pointer to
output float4 force
float
*
pEnergy
;
// Pointer to
energy output buffer
float
*
pBornForce
;
// Pointer to Born force data
float
*
pBornSum
;
// Pointer to Born Radii calculation output buffers
float
*
pBornRadii
;
// Pointer to Born Radii
float
*
pBornSum
;
// Pointer to Born Radii calculation output buffers
float
*
pBornRadii
;
// Pointer to Born Radii
float
*
pObcChain
;
// Pointer to OBC chain data
float4
*
pLinearMomentum
;
// Pointer to linear momentum
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
0541e749
...
...
@@ -106,7 +106,6 @@ struct Molecule {
};
static
const
float
dielectricOffset
=
0.009
f
;
static
const
float
PI
=
3.1415926535
f
;
static
const
float
probeRadius
=
0.14
f
;
static
const
float
forceConversionFactor
=
0.4184
f
;
...
...
@@ -1444,12 +1443,12 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
bRemoveCM
=
false
;
gpu
->
bRecalculateBornRadii
=
true
;
gpu
->
bIncludeGBSA
=
false
;
gpu
->
bReduceEnergies
=
true
;
gpuInitializeRandoms
(
gpu
);
// To be determined later
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psForce4
=
NULL
;
gpu
->
psEnergy
=
NULL
;
gpu
->
sim
.
pForce4
=
NULL
;
gpu
->
sim
.
pForce4a
=
NULL
;
gpu
->
sim
.
pForce4b
=
NULL
;
...
...
@@ -1619,6 +1618,7 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psOldPosq4
;
delete
gpu
->
psVelm4
;
delete
gpu
->
psForce4
;
delete
gpu
->
psEnergy
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
...
...
@@ -1713,12 +1713,15 @@ int gpuBuildOutputBuffers(gpuContext gpu)
}
}
gpu
->
sim
.
outputBuffers
=
outputBuffers
;
gpu
->
sim
.
energyOutputBuffers
=
max
(
gpu
->
sim
.
nonbond_threads_per_block
,
gpu
->
sim
.
localForces_threads_per_block
)
*
gpu
->
sim
.
blocks
;
gpu
->
psForce4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
outputBuffers
,
"Force"
);
gpu
->
psEnergy
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
energyOutputBuffers
,
1
,
"Energy"
);
gpu
->
psBornForce
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
gpu
->
sim
.
nonbondOutputBuffers
,
"BornForce"
);
gpu
->
psBornSum
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
gpu
->
sim
.
nonbondOutputBuffers
,
"BornSum"
);
gpu
->
sim
.
pForce4
=
gpu
->
psForce4
->
_pDevStream
[
0
];
gpu
->
sim
.
pForce4a
=
gpu
->
sim
.
pForce4
;
gpu
->
sim
.
pForce4b
=
gpu
->
sim
.
pForce4
+
1
*
gpu
->
sim
.
nonbondOutputBuffers
*
gpu
->
sim
.
stride
;
gpu
->
sim
.
pEnergy
=
gpu
->
psEnergy
->
_pDevStream
[
0
];
gpu
->
sim
.
pBornForce
=
gpu
->
psBornForce
->
_pDevStream
[
0
];
gpu
->
sim
.
pBornSum
=
gpu
->
psBornSum
->
_pDevStream
[
0
];
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
0541e749
...
...
@@ -77,7 +77,6 @@ struct _gpuContext {
bool
bRecalculateBornRadii
;
bool
bOutputBufferPerWarp
;
bool
bIncludeGBSA
;
bool
bReduceEnergies
;
unsigned
long
seed
;
SM_VERSION
sm_version
;
CUDPPHandle
cudpp
;
...
...
@@ -86,6 +85,7 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psOldPosq4
;
CUDAStream
<
float4
>*
psVelm4
;
CUDAStream
<
float4
>*
psForce4
;
CUDAStream
<
float
>*
psEnergy
;
// Energy output buffer
CUDAStream
<
float4
>*
psxVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
0541e749
...
...
@@ -122,25 +122,19 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
#include "kCalculateCDLJEwaldFastReciprocal.h"
__global__
extern
void
kCalculateCDLJCutoffForces_12_kernel
();
double
kCalculateCDLJForces
(
gpuContext
gpu
)
void
kCalculateCDLJForces
(
gpuContext
gpu
)
{
// printf("kCalculateCDLJCutoffForces\n");
unsigned
int
N
=
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nonbond_threads_per_block
;
float
*
energies_dev
;
cudaMalloc
((
void
**
)
&
energies_dev
,
sizeof
(
float
)
*
N
);
CUDPPResult
result
;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJN2ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
energies_dev
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJN2Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
energies_dev
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJN2Forces"
);
break
;
case
CUTOFF
:
...
...
@@ -159,10 +153,10 @@ double kCalculateCDLJForces(gpuContext gpu)
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJCutoffForces"
);
break
;
case
PERIODIC
:
...
...
@@ -181,10 +175,10 @@ double kCalculateCDLJForces(gpuContext gpu)
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJPeriodicByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJPeriodicForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
break
;
case
EWALD
:
...
...
@@ -203,10 +197,10 @@ double kCalculateCDLJForces(gpuContext gpu)
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJEwaldDirectByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
if
(
false
)
{
...
...
@@ -224,19 +218,4 @@ double kCalculateCDLJForces(gpuContext gpu)
}
}
double
sum
=
0.0
f
;
if
(
gpu
->
bReduceEnergies
)
{
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
memset
(
energies
,
0
,
sizeof
(
float
)
*
N
);
cudaMemcpy
(
energies
,
energies_dev
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
/* printf("\n");
for (int i = 0; i < N; i++) printf("\t%f", energies[i]);
printf("\n");
*/
}
cudaFree
(
energies_dev
);
return
sum
;
}
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
0541e749
...
...
@@ -30,7 +30,7 @@
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
float
*
gpu_energies
)
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
...
...
@@ -38,7 +38,6 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
energyIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
float
CDLJ_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
...
...
@@ -46,8 +45,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#endif
#ifdef USE_EWALD
float
PI
=
3
.
14159265358979323846
f
;
float
SQRT_PI
=
sqrt
(
PI
);
const
float
SQRT_PI
=
sqrt
(
PI
);
#endif
unsigned
int
lasty
=
0xFFFFFFFF
;
...
...
@@ -140,7 +138,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
}
#endif
/* E */
energy
+=
CDLJ_energy
/
2
.
0
;
energy
+=
0
.
5
f
*
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
...
...
@@ -214,7 +212,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
CDLJ_energy
=
0
.
0
f
;
}
/* E */
energy
+=
CDLJ_energy
/
2
.
0
;
energy
+=
0
.
5
f
*
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
...
...
@@ -541,5 +539,5 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
pos
++
;
}
gpu_energies
[
energyInde
x
]
=
energy
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+
=
energy
;
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
0541e749
...
...
@@ -104,14 +104,10 @@ extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
);
double
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
unsigned
int
N
=
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nonbond_threads_per_block
;
float
*
energies_dev
;
cudaMalloc
((
void
**
)
&
energies_dev
,
sizeof
(
float
)
*
N
);
// check if Born radii need to be calculated
kClearBornForces
(
gpu
);
...
...
@@ -126,10 +122,10 @@ double kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaN2ByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
energies_dev
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJObcGbsaN2Forces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
energies_dev
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaN2Forces1"
);
break
;
case
CUTOFF
:
...
...
@@ -153,10 +149,10 @@ double kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaCutoffForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaCutoffForces1"
);
break
;
case
PERIODIC
:
...
...
@@ -180,24 +176,11 @@ double kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
energies_dev
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaPeriodicForces1"
);
break
;
}
double
sum
=
0.0
f
;
if
(
gpu
->
bReduceEnergies
)
{
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
// double sum = kReduceNonbondEnergies(gpu, energies_dev);
// printf("GPU____Sum: %f\n", sum);
cudaMemcpy
(
energies
,
energies_dev
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
// printf("ControlSum: %f\n", sum);
}
cudaFree
(
energies_dev
);
return
sum
;
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
0541e749
...
...
@@ -30,7 +30,7 @@
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
,
float
*
gpu_energies
)
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
...
...
@@ -38,7 +38,6 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
energyIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
float
CDLJObcGbsa_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
...
...
@@ -138,7 +137,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
if
(
i
<
cSim
.
atoms
)
{
energy
+=
CDLJObcGbsa_energy
/
2
.
0
f
;
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
}
// Add Forces
dx
*=
dEdR
;
...
...
@@ -224,7 +223,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
if
(
i
<
cSim
.
atoms
)
{
energy
+=
CDLJObcGbsa_energy
/
2
.
0
f
;
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
}
// Add Forces
dx
*=
dEdR
;
...
...
@@ -615,5 +614,5 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
}
pos
++
;
}
gpu_energies
[
energyInde
x
]
=
energy
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+
=
energy
;
}
platforms/cuda/src/kernels/kCalculateLocalForces.cu
View file @
0541e749
...
...
@@ -59,14 +59,14 @@ static __constant__ cudaGmxSimulation cSim;
#define GETPREFACTORSGIVENANGLECOSINE(cosine, param, dEdR) \
{
\
float
angle
=
acos
(
cosine
);
\
float
deltaIdeal
=
angle
-
(
param
.
x
*
(
3.14159265
f
/
180.0
f
));
\
float
deltaIdeal
=
angle
-
(
param
.
x
*
(
PI
/
180.0
f
));
\
dEdR
=
param
.
y
*
deltaIdeal
;
\
}
#define GETENERGYGIVENANGLECOSINE(cosine, param, dEdR) \
{
\
float
angle
=
acos
(
cosine
);
\
float
deltaIdeal
=
angle
-
(
param
.
x
*
(
3.14159265
f
/
180.0
f
));
\
float
deltaIdeal
=
angle
-
(
param
.
x
*
(
PI
/
180.0
f
));
\
dEdR
=
param
.
y
*
deltaIdeal
*
deltaIdeal
;
\
}
...
...
@@ -116,10 +116,9 @@ void GetCalculateLocalForcesSim(gpuContext gpu)
}
__global__
void
kCalculateLocalForces_kernel
(
float
*
gpu_energies
)
__global__
void
kCalculateLocalForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
energyIndex
=
pos
;
Vectors
*
A
=
&
sV
[
threadIdx
.
x
];
float
energy
=
0.0
f
;
...
...
@@ -138,7 +137,7 @@ __global__ void kCalculateLocalForces_kernel(float * gpu_energies)
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
float
deltaIdeal
=
r
-
bond
.
x
;
/* E */
energy
+=
bond
.
y
*
deltaIdeal
*
deltaIdeal
/
2.0
;
/* E */
energy
+=
0.5
f
*
bond
.
y
*
deltaIdeal
*
deltaIdeal
;
float
dEdR
=
bond
.
y
*
deltaIdeal
;
dEdR
=
(
r
>
0.0
f
)
?
(
dEdR
/
r
)
:
0.0
f
;
// printf("D: %11.4f %11.4f %11.4f %11.4f %11.4f %11.4f\n", dx, dy, dz, r, deltaIdeal, dEdR);
...
...
@@ -188,7 +187,7 @@ __global__ void kCalculateLocalForces_kernel(float * gpu_energies)
float
angle_energy
;
/* E */
GETENERGYGIVENANGLECOSINE
(
cosine
,
bond_angle
,
angle_energy
);
energy
+=
angle_energy
/
2.0
;
energy
+=
0.5
f
*
angle_energy
;
float
dEdR
;
GETPREFACTORSGIVENANGLECOSINE
(
cosine
,
bond_angle
,
dEdR
);
...
...
@@ -251,7 +250,7 @@ __global__ void kCalculateLocalForces_kernel(float * gpu_energies)
float
dihedralAngle
;
GETDIHEDRALANGLEBETWEENTHREEVECTORS
(
A
->
v0
,
A
->
v1
,
A
->
v2
,
A
->
v0
,
cp0
,
cp1
,
dihedralAngle
);
float4
dihedral
=
cSim
.
pDihedralParameter
[
pos1
];
float
deltaAngle
=
dihedral
.
z
*
dihedralAngle
-
(
dihedral
.
y
*
3.14159265
f
/
180.0
f
);
float
deltaAngle
=
dihedral
.
z
*
dihedralAngle
-
(
dihedral
.
y
*
PI
/
180.0
f
);
// ATTENTION: This section leads to a divergent deltaAngle values wrt
// forces and energies. We separate the case dihedral.z = n = 0, which
...
...
@@ -260,8 +259,8 @@ __global__ void kCalculateLocalForces_kernel(float * gpu_energies)
/* E */
else
{
float
deltaAngle
=
dihedralAngle
-
dihedral
.
y
;
if
(
deltaAngle
<
-
M_
PI
)
deltaAngle
+=
2.0
*
M_
PI
;
else
if
(
deltaAngle
>
M_
PI
)
deltaAngle
-=
2.0
*
M_
PI
;
if
(
deltaAngle
<
-
PI
)
deltaAngle
+=
2.0
f
*
PI
;
else
if
(
deltaAngle
>
PI
)
deltaAngle
-=
2.0
f
*
PI
;
energy
+=
dihedral
.
x
*
deltaAngle
*
deltaAngle
;
}
...
...
@@ -354,11 +353,11 @@ __global__ void kCalculateLocalForces_kernel(float * gpu_energies)
GETDIHEDRALANGLECOSINEBETWEENTHREEVECTORS
(
A
->
v0
,
A
->
v1
,
A
->
v2
,
A
->
v0
,
cp0
,
cp1
,
dihedralAngle
,
cosPhi
);
if
(
dihedralAngle
<
0.0
f
)
{
dihedralAngle
+=
3.14159265
f
;
dihedralAngle
+=
PI
;
}
else
{
dihedralAngle
-=
3.14159265
f
;
dihedralAngle
-=
PI
;
}
cosPhi
=
-
cosPhi
;
// printf("%4d: %9.4f %9.4f\n", pos1, dihedralAngle, cosPhi);
...
...
@@ -607,30 +606,14 @@ __global__ void kCalculateLocalForces_kernel(float * gpu_energies)
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
gpu_energies
[
energyInde
x
]
=
energy
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+
=
energy
;
}
double
kCalculateLocalForces
(
gpuContext
gpu
)
void
kCalculateLocalForces
(
gpuContext
gpu
)
{
// printf("kCalculateLocalForces\n");
unsigned
int
N
=
gpu
->
sim
.
blocks
*
gpu
->
sim
.
localForces_threads_per_block
;
float
*
energies_dev
;
cudaMalloc
((
void
**
)
&
energies_dev
,
sizeof
(
float
)
*
N
);
kCalculateLocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
localForces_threads_per_block
,
gpu
->
sim
.
localForces_threads_per_block
*
sizeof
(
Vectors
)
>>>
(
energies_dev
);
kCalculateLocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
localForces_threads_per_block
,
gpu
->
sim
.
localForces_threads_per_block
*
sizeof
(
Vectors
)
>>>
();
LAUNCHERROR
(
"kCalculateLocalForces"
);
double
sum
=
0.0
f
;
if
(
gpu
->
bReduceEnergies
)
{
// sum = kReduceLocalEnergies(gpu, energies_dev);
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
memset
(
energies
,
0
,
sizeof
(
float
)
*
N
);
cudaMemcpy
(
energies
,
energies_dev
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
}
cudaFree
(
energies_dev
);
return
sum
;
}
platforms/cuda/src/kernels/kForces.cu
View file @
0541e749
...
...
@@ -87,6 +87,23 @@ void kClearBornForces(gpuContext gpu)
LAUNCHERROR
(
"kClearBornForces"
);
}
__global__
void
kClearEnergy_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
energyOutputBuffers
)
{
((
float
*
)
cSim
.
pEnergy
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearEnergy
(
gpuContext
gpu
)
{
// printf("kClearEnergy\n");
kClearEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearEnergy"
);
}
__global__
void
kReduceBornSumAndForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
@@ -188,20 +205,6 @@ void kReduceBornSumAndForces(gpuContext gpu)
//printf("kReduceBornSumAndForces\n");
kReduceBornSumAndForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
#if 0
//gpuDumpObcLoop1( gpu );
/*
gpu->psForce4->Download();
for (int i = 0; i < gpu->natoms; i++)
{
printf("%4d: %12.6f %12.6f %12.6f\n", i,
gpu->psForce4->_pSysStream[0][i].x,
gpu->psForce4->_pSysStream[0][i].y,
gpu->psForce4->_pSysStream[0][i].z
);
} */
#endif
}
__global__
void
kReduceForces_kernel
()
...
...
@@ -254,56 +257,19 @@ void kReduceForces(gpuContext gpu)
LAUNCHERROR
(
"kReduceForces"
);
}
__global__
void
kReduceEnergies_kernel
(
float
*
gpu_energies
)
{
// GPU energy reduction algorithm goes here
// unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
}
double
kReduceLocalEnergies
(
gpuContext
gpu
,
float
*
gpu_energies
)
double
kReduceEnergy
(
gpuContext
gpu
)
{
// printf("kReduceLocalEnergies\n");
unsigned
int
N
=
gpu
->
sim
.
blocks
*
gpu
->
sim
.
localForces_threads_per_block
;
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
//kReduceEnergies_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block>>>(gpu_energies);
LAUNCHERROR
(
"kReduceLocalEnergies"
);
cudaMemcpy
(
energies
,
gpu_energies
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
// printf("kReduceEnergy\n");
gpu
->
psEnergy
->
Download
();
double
sum
=
0.0
f
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
energyOutputBuffers
;
i
++
)
sum
+=
(
*
gpu
->
psEnergy
)[
i
];
return
sum
;
}
double
kReduceNonbondEnergies
(
gpuContext
gpu
,
float
*
gpu_energies
)
{
// printf("kReduceNonbondEnergies\n");
unsigned
int
N
=
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nonbond_threads_per_block
;
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
//kReduceEnergies_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>(gpu_energies);
LAUNCHERROR
(
"kReduceNonbondEnergies"
);
cudaMemcpy
(
energies
,
gpu_energies
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
double
sum
=
0.0
f
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
return
sum
;
}
double
kReduceObcGbsaBornEnergies
(
gpuContext
gpu
,
float
*
gpu_energies
)
{
// printf("kReduceObcGbsaBornEnergies\n");
unsigned
int
N
=
gpu
->
sim
.
blocks
*
gpu
->
sim
.
bf_reduce_threads_per_block
;
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
//kReduceEnergies_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>(gpu_energies);
LAUNCHERROR
(
"kReduceObcGbsaBornEnergies"
);
cudaMemcpy
(
energies
,
gpu_energies
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
double
sum
=
0.0
f
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
return
sum
;
}
__global__
void
kReduceObcGbsaBornForces_kernel
(
float
*
gpu_energies
)
__global__
void
kReduceObcGbsaBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
energyIndex
=
pos
;
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
...
...
@@ -357,29 +323,14 @@ __global__ void kReduceObcGbsaBornForces_kernel(float * gpu_energies)
}
/* E */
// correct for surface area factor of -6
gpu_energies
[
energyInde
x
]
=
energy
/
-
6.0
f
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+
=
energy
/
-
6.0
f
;
}
double
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
{
unsigned
int
N
=
gpu
->
sim
.
blocks
*
gpu
->
sim
.
bf_reduce_threads_per_block
;
float
*
energies_dev
;
cudaMalloc
((
void
**
)
&
energies_dev
,
sizeof
(
float
)
*
N
);
//printf("kReduceObcGbsaBornForces\n");
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bf_reduce_threads_per_block
>>>
(
energies_dev
);
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaBornForces"
);
float
sum
=
0.0
f
;
if
(
gpu
->
bReduceEnergies
)
{
// double sum = kReduceObcGbsaBornEnergies(gpu, energies_dev);
float
*
energies
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
N
);
cudaMemcpy
(
energies
,
energies_dev
,
sizeof
(
float
)
*
N
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
N
;
i
++
)
sum
+=
energies
[
i
];
}
cudaFree
(
energies_dev
);
return
sum
;
}
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
View file @
0541e749
...
...
@@ -179,7 +179,8 @@ void testForce(int numParticles, NonbondedForce::NonbondedMethod method) {
}
norm
=
std
::
sqrt
(
norm
);
diff
=
std
::
sqrt
(
diff
);
ASSERT_EQUAL_TOL
(
0.0
,
diff
,
0.001
*
norm
);
ASSERT_EQUAL_TOL
(
0.0
,
diff
,
0.001
*
norm
);
ASSERT_EQUAL_TOL
(
state
.
getPotentialEnergy
(),
refState
.
getPotentialEnergy
(),
1e-3
);
// Take a small step in the direction of the energy gradient. (This doesn't work with cutoffs, since the energy
// changes discontinuously at the cutoff distance.)
...
...
platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
View file @
0541e749
...
...
@@ -257,7 +257,6 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
if
(
totalEnergy
)
*
totalEnergy
-=
selfEwaldEnergy
;
// printf("- selfEwaldEnergy: %f aEwald %f p1 %f\n", -selfEwaldEnergy, alphaEwald, atomParameters[atomID][QIndex]);
if
(
energyByAtom
){
energyByAtom
[
atomID
]
-=
selfEwaldEnergy
;
...
...
@@ -267,7 +266,6 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
RealOpenMM
tempStore
=
*
totalEnergy
;
// PME
if
(
pme
)
{
...
...
@@ -410,8 +408,6 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
SimTKOpenMMLog
::
printError
(
message
);
}
// printf("recipEnergy: %f\n", *totalEnergy - tempStore);
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
...
...
@@ -451,13 +447,11 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// accumulate energies
realSpaceEwaldEnergy
=
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
*
erfc
(
alphaR
);
// printf("realSpaceEwald: %f p1 %f p2 %f invR %f erfc(aR) %f aR %f\n", realSpaceEwaldEnergy, atomParameters[ii][QIndex], atomParameters[jj][QIndex], inverseR, erfc(alphaR), alphaR);
vdwEnergy
=
eps
*
(
sig6
-
one
)
*
sig6
;
if
(
totalEnergy
)
*
totalEnergy
+=
realSpaceEwaldEnergy
+
vdwEnergy
;
if
(
energyByAtom
){
energyByAtom
[
ii
]
+=
realSpaceEwaldEnergy
+
vdwEnergy
;
energyByAtom
[
jj
]
+=
realSpaceEwaldEnergy
+
vdwEnergy
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment