Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
873552ba
Commit
873552ba
authored
Nov 16, 2012
by
Peter Eastman
Browse files
Deleted free energy plugin
parent
c775bd19
Changes
81
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
6074 deletions
+0
-6074
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernelFactory.cpp
...Energy/platforms/cuda/src/CudaFreeEnergyKernelFactory.cpp
+0
-120
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
...s/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
+0
-653
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.h
...ins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.h
+0
-204
plugins/freeEnergy/platforms/cuda/src/FreeEnergyCudaData.cpp
plugins/freeEnergy/platforms/cuda/src/FreeEnergyCudaData.cpp
+0
-141
plugins/freeEnergy/platforms/cuda/src/FreeEnergyCudaData.h
plugins/freeEnergy/platforms/cuda/src/FreeEnergyCudaData.h
+0
-108
plugins/freeEnergy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
...rgy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
+0
-142
plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyCudaGpu.cpp
...eeEnergy/platforms/cuda/src/kernels/freeEnergyCudaGpu.cpp
+0
-512
plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyCudaTypes.h
...eeEnergy/platforms/cuda/src/kernels/freeEnergyCudaTypes.h
+0
-91
plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyGpuTypes.h
...reeEnergy/platforms/cuda/src/kernels/freeEnergyGpuTypes.h
+0
-61
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
.../cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
+0
-177
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
...s/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
+0
-660
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
...atforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
+0
-603
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
...latforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
+0
-258
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
...atforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
+0
-158
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
...latforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
+0
-382
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
...atforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
+0
-290
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
...platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
+0
-177
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.h
.../platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.h
+0
-507
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
...orms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
+0
-472
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
...forms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
+0
-358
No files found.
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernelFactory.cpp
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaFreeEnergyKernelFactory.h"
#include "CudaFreeEnergyKernels.h"
#include "openmm/freeEnergyKernels.h"
#include "FreeEnergyCudaData.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
#include "kernels/GpuFreeEnergyCudaKernels.h"
using
namespace
OpenMM
;
extern
"C"
void
registerPlatforms
()
{
}
extern
"C"
void
registerKernelFactories
()
{
// (void) fprintf( stderr, "initOpenMMCudaFreeEnergyPlugin called\n");
if
(
gpuIsAvailableSoftcore
()
){
for
(
int
ii
=
0
;
ii
<
Platform
::
getNumPlatforms
();
ii
++
){
Platform
&
platform
=
Platform
::
getPlatform
(
ii
);
if
(
platform
.
getName
().
compare
(
"Cuda"
)
==
0
){
CudaFreeEnergyKernelFactory
*
factory
=
new
CudaFreeEnergyKernelFactory
();
platform
.
registerKernelFactory
(
CalcNonbondedSoftcoreForceKernel
::
Name
(),
factory
);
platform
.
registerKernelFactory
(
CalcGBSAOBCSoftcoreForceKernel
::
Name
(),
factory
);
platform
.
registerKernelFactory
(
CalcGBVISoftcoreForceKernel
::
Name
(),
factory
);
}
}
}
}
extern
"C"
OPENMMCUDA_EXPORT
void
registerFreeEnergyCudaKernelFactories
(
void
)
{
int
hasCudaPlatform
=
0
;
for
(
int
ii
=
0
;
ii
<
Platform
::
getNumPlatforms
()
&&
hasCudaPlatform
==
0
;
ii
++
){
Platform
&
platform
=
Platform
::
getPlatform
(
ii
);
if
(
platform
.
getName
()
==
"Cuda"
){
hasCudaPlatform
=
1
;
}
}
if
(
hasCudaPlatform
==
0
){
if
(
gpuIsAvailable
()
){
Platform
::
registerPlatform
(
new
CudaPlatform
());
}
}
registerKernelFactories
();
}
static
std
::
map
<
ContextImpl
*
,
FreeEnergyCudaData
*>
contextToFreeEnergyDataMap
;
// look up FreeEnergyCudaData for input contextImpl in contextToFreeEnergyDataMap
extern
"C"
void
*
getFreeEnergyCudaData
(
ContextImpl
&
context
)
{
std
::
map
<
ContextImpl
*
,
FreeEnergyCudaData
*>::
const_iterator
mapIterator
=
contextToFreeEnergyDataMap
.
find
(
&
context
);
if
(
mapIterator
==
contextToFreeEnergyDataMap
.
end
()
){
return
NULL
;
}
else
{
return
static_cast
<
void
*>
(
mapIterator
->
second
);
}
}
// remove FreeEnergyCudaData from contextToFreeEnergyDataMap
extern
"C"
void
removeFreeEnergyCudaDataFromContextMap
(
void
*
inputContext
)
{
ContextImpl
*
context
=
static_cast
<
ContextImpl
*>
(
inputContext
);
contextToFreeEnergyDataMap
.
erase
(
context
);
return
;
}
KernelImpl
*
CudaFreeEnergyKernelFactory
::
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
{
// create FreeEnergyCudaData object if contextToFreeEnergyDataMap does not contain
// key equal to current context
FreeEnergyCudaData
*
freeEnergyCudaData
;
std
::
map
<
ContextImpl
*
,
FreeEnergyCudaData
*>::
const_iterator
mapIterator
=
contextToFreeEnergyDataMap
.
find
(
&
context
);
if
(
mapIterator
==
contextToFreeEnergyDataMap
.
end
()
){
CudaPlatform
::
PlatformData
&
cudaPlatformData
=
*
static_cast
<
CudaPlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
freeEnergyCudaData
=
new
FreeEnergyCudaData
(
cudaPlatformData
);
contextToFreeEnergyDataMap
[
&
context
]
=
freeEnergyCudaData
;
//freeEnergyCudaData->setLog( stderr );
freeEnergyCudaData
->
setContextImpl
(
static_cast
<
void
*>
(
&
context
)
);
}
else
{
freeEnergyCudaData
=
mapIterator
->
second
;
}
if
(
name
==
CalcNonbondedSoftcoreForceKernel
::
Name
())
return
new
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
(
name
,
platform
,
*
freeEnergyCudaData
,
context
.
getSystem
());
if
(
name
==
CalcGBSAOBCSoftcoreForceKernel
::
Name
())
return
new
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
(
name
,
platform
,
*
freeEnergyCudaData
);
if
(
name
==
CalcGBVISoftcoreForceKernel
::
Name
())
return
new
CudaFreeEnergyCalcGBVISoftcoreForceKernel
(
name
,
platform
,
*
freeEnergyCudaData
);
throw
OpenMMException
(
(
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
()
);
}
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaFreeEnergyKernels.h"
#include "CudaForceInfo.h"
#include "openmm/Context.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/ContextImpl.h"
#include "kernels/gputypes.h"
#include "kernels/cudaKernels.h"
#include "kernels/GpuFreeEnergyCudaKernels.h"
#include <cmath>
#include <map>
#include <cstring>
#include <cstdlib>
#include <typeinfo>
using
namespace
OpenMM
;
typedef
std
::
map
<
std
::
string
,
int
>
MapStringInt
;
typedef
MapStringInt
::
iterator
MapStringIntI
;
typedef
MapStringInt
::
const_iterator
MapStringIntCI
;
// force names
const
std
::
string
HARMONIC_BOND_FORCE
=
"HarmonicBond"
;
const
std
::
string
HARMONIC_ANGLE_FORCE
=
"HarmonicBond"
;
const
std
::
string
PERIODIC_TORSION_FORCE
=
"PeriodicTorsion"
;
const
std
::
string
RB_TORSION_FORCE
=
"RbTorsion"
;
const
std
::
string
NB_FORCE
=
"Nb"
;
const
std
::
string
NB_SOFTCORE_FORCE
=
"NbSoftcore"
;
const
std
::
string
NB_EXCEPTION_FORCE
=
"NbException"
;
const
std
::
string
NB_EXCEPTION_SOFTCORE_FORCE
=
"NbSoftcoreException"
;
const
std
::
string
GBSA_OBC_FORCE
=
"Obc"
;
const
std
::
string
GBSA_OBC_SOFTCORE_FORCE
=
"ObcSoftcore"
;
const
std
::
string
GBVI_FORCE
=
"GBVI"
;
const
std
::
string
GBVI_SOFTCORE_FORCE
=
"GBVISoftcore"
;
static
void
getForceMap
(
const
System
&
system
,
MapStringInt
&
forceMap
,
FILE
*
log
)
{
// check forces and relevant parameters
for
(
int
i
=
0
;
i
<
system
.
getNumForces
();
++
i
)
{
int
hit
=
0
;
const
Force
&
force
=
system
.
getForce
(
i
);
std
::
string
forceName
=
"NA"
;
// bond
if
(
!
hit
){
try
{
const
HarmonicBondForce
&
harmonicBondForce
=
dynamic_cast
<
const
HarmonicBondForce
&>
(
force
);
forceMap
[
HARMONIC_BOND_FORCE
]
=
1
;
forceName
=
HARMONIC_BOND_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// angle
if
(
!
hit
){
try
{
const
HarmonicAngleForce
&
harmonicAngleForce
=
dynamic_cast
<
const
HarmonicAngleForce
&>
(
force
);
forceMap
[
HARMONIC_ANGLE_FORCE
]
=
1
;
forceName
=
HARMONIC_ANGLE_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// PeriodicTorsionForce
if
(
!
hit
){
try
{
const
PeriodicTorsionForce
&
periodicTorsionForce
=
dynamic_cast
<
const
PeriodicTorsionForce
&>
(
force
);
forceMap
[
PERIODIC_TORSION_FORCE
]
=
1
;
forceName
=
PERIODIC_TORSION_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// RBTorsionForce
if
(
!
hit
){
try
{
const
RBTorsionForce
&
rBTorsionForce
=
dynamic_cast
<
const
RBTorsionForce
&>
(
force
);
forceMap
[
RB_TORSION_FORCE
]
=
1
;
forceName
=
RB_TORSION_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// nonbonded
if
(
!
hit
){
try
{
const
NonbondedForce
&
nbForce
=
dynamic_cast
<
const
NonbondedForce
&>
(
force
);
forceMap
[
NB_FORCE
]
=
1
;
forceName
=
NB_FORCE
;
}
catch
(
std
::
bad_cast
){
}
}
// nonbonded softcore
if
(
!
hit
){
try
{
const
NonbondedSoftcoreForce
&
nbForce
=
dynamic_cast
<
const
NonbondedSoftcoreForce
&>
(
force
);
forceMap
[
NB_SOFTCORE_FORCE
]
=
1
;
forceName
=
NB_SOFTCORE_FORCE
;
}
catch
(
std
::
bad_cast
){
}
}
// GBSA OBC
if
(
!
hit
){
try
{
const
GBSAOBCForce
&
obcForce
=
dynamic_cast
<
const
GBSAOBCForce
&>
(
force
);
forceMap
[
GBSA_OBC_FORCE
]
=
1
;
forceName
=
GBSA_OBC_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// GBSA OBC softcore
if
(
!
hit
){
try
{
const
GBSAOBCSoftcoreForce
&
obcForce
=
dynamic_cast
<
const
GBSAOBCSoftcoreForce
&>
(
force
);
forceMap
[
GBSA_OBC_SOFTCORE_FORCE
]
=
1
;
forceName
=
GBSA_OBC_SOFTCORE_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// GB/VI
if
(
!
hit
){
try
{
const
GBVIForce
&
obcForce
=
dynamic_cast
<
const
GBVIForce
&>
(
force
);
forceMap
[
GBVI_FORCE
]
=
1
;
forceName
=
GBVI_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
// GB/VI softcore
if
(
!
hit
){
try
{
const
GBVISoftcoreForce
&
gbviForce
=
dynamic_cast
<
const
GBVISoftcoreForce
&>
(
force
);
forceMap
[
GBVI_SOFTCORE_FORCE
]
=
1
;
forceName
=
GBVI_SOFTCORE_FORCE
;
hit
++
;
}
catch
(
std
::
bad_cast
){
}
}
if
(
log
){
(
void
)
fprintf
(
log
,
"Map: Force %d %s
\n
"
,
i
,
forceName
.
c_str
()
);
}
}
}
class
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
NonbondedSoftcoreForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
sigma1
,
sigma2
,
epsilon1
,
epsilon2
,
softcoreLJLambda1
,
softcoreLJLambda2
;
force
.
getParticleParameters
(
particle1
,
charge1
,
sigma1
,
epsilon1
,
softcoreLJLambda1
);
force
.
getParticleParameters
(
particle2
,
charge2
,
sigma2
,
epsilon2
,
softcoreLJLambda2
);
return
(
charge1
==
charge2
&&
sigma1
==
sigma2
&&
epsilon1
==
epsilon2
&&
softcoreLJLambda1
==
softcoreLJLambda2
);
}
int
getNumParticleGroups
()
{
return
force
.
getNumExceptions
();
}
void
getParticlesInGroup
(
int
index
,
std
::
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
double
chargeProd
,
sigma
,
epsilon
,
softcoreLJLambda
;
force
.
getExceptionParameters
(
index
,
particle1
,
particle2
,
chargeProd
,
sigma
,
epsilon
,
softcoreLJLambda
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
particle1
,
particle2
;
double
chargeProd1
,
chargeProd2
,
sigma1
,
sigma2
,
epsilon1
,
epsilon2
,
softcoreLJLambda1
,
softcoreLJLambda2
;
force
.
getExceptionParameters
(
group1
,
particle1
,
particle2
,
chargeProd1
,
sigma1
,
epsilon1
,
softcoreLJLambda1
);
force
.
getExceptionParameters
(
group2
,
particle1
,
particle2
,
chargeProd2
,
sigma2
,
epsilon2
,
softcoreLJLambda2
);
return
(
chargeProd1
==
chargeProd2
&&
sigma1
==
sigma2
&&
epsilon1
==
epsilon2
&&
softcoreLJLambda1
==
softcoreLJLambda2
);
}
private:
const
NonbondedSoftcoreForce
&
force
;
};
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::~
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
()
{
if
(
0
&&
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"~CudaFreeEnergyCalcNonbondedSoftcoreForceKernel called.
\n
"
);
(
void
)
fflush
(
data
.
getLog
()
);
}
data
.
decrementKernelCount
();
}
void
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedSoftcoreForce
&
force
)
{
// ---------------------------------------------------------------------------------------
static
const
std
::
string
methodName
=
"CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize"
;
// ---------------------------------------------------------------------------------------
if
(
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"%s called.
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
data
.
getLog
()
);
}
// check forces and relevant parameters
MapStringInt
forceMap
;
getForceMap
(
system
,
forceMap
,
data
.
getLog
()
);
int
softcore
=
0
;
if
(
forceMap
.
find
(
GBSA_OBC_FORCE
)
!=
forceMap
.
end
()
){
setIncludeGBSA
(
true
);
}
if
(
forceMap
.
find
(
GBSA_OBC_SOFTCORE_FORCE
)
!=
forceMap
.
end
()
){
setIncludeGBSA
(
true
);
softcore
++
;
}
if
(
forceMap
.
find
(
GBVI_FORCE
)
!=
forceMap
.
end
()
){
setIncludeGBVI
(
true
);
}
if
(
forceMap
.
find
(
GBVI_SOFTCORE_FORCE
)
!=
forceMap
.
end
()
){
setIncludeGBVI
(
true
);
softcore
++
;
}
if
(
forceMap
.
find
(
NB_SOFTCORE_FORCE
)
!=
forceMap
.
end
()
){
softcore
++
;
}
setIncludeSoftcore
(
softcore
);
numParticles
=
force
.
getNumParticles
();
// Identify which exceptions are 1-4 interactions.
std
::
vector
<
pair
<
int
,
int
>
>
exclusions
;
std
::
vector
<
int
>
exceptions
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
double
chargeProd
,
sigma
,
epsilon
,
softcoreLJLambda
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
chargeProd
,
sigma
,
epsilon
,
softcoreLJLambda
);
exclusions
.
push_back
(
pair
<
int
,
int
>
(
particle1
,
particle2
));
if
(
chargeProd
!=
0.0
||
epsilon
!=
0.0
)
exceptions
.
push_back
(
i
);
}
// Initialize nonbonded interactions.
if
(
numParticles
>
0
){
std
::
vector
<
int
>
particle
(
numParticles
);
std
::
vector
<
float
>
c6
(
numParticles
);
std
::
vector
<
float
>
c12
(
numParticles
);
std
::
vector
<
float
>
q
(
numParticles
);
std
::
vector
<
float
>
softcoreLJLambdaArray
(
numParticles
);
std
::
vector
<
char
>
symbol
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusionList
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
charge
,
radius
,
depth
,
softcoreLJLambda
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
depth
,
softcoreLJLambda
);
particle
[
i
]
=
i
;
q
[
i
]
=
static_cast
<
float
>
(
charge
);
c6
[
i
]
=
static_cast
<
float
>
(
(
4
*
depth
*
pow
(
radius
,
6.0
))
);
c12
[
i
]
=
static_cast
<
float
>
(
(
4
*
depth
*
pow
(
radius
,
12.0
))
);
softcoreLJLambdaArray
[
i
]
=
static_cast
<
float
>
(
softcoreLJLambda
);
exclusionList
[
i
].
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
(
int
)
exclusions
.
size
();
i
++
)
{
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
}
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
freeEnergyGpuSetPeriodicBoxSize
(
data
.
getFreeEnergyGpu
(),
static_cast
<
float
>
(
boxVectors
[
0
][
0
]
),
static_cast
<
float
>
(
boxVectors
[
1
][
1
]
),
static_cast
<
float
>
(
boxVectors
[
2
][
2
]
));
CudaFreeEnergyNonbondedMethod
method
=
FREE_ENERGY_NO_CUTOFF
;
if
(
force
.
getNonbondedMethod
()
!=
NonbondedSoftcoreForce
::
NoCutoff
)
{
method
=
FREE_ENERGY_CUTOFF
;
}
if
(
force
.
getNonbondedMethod
()
==
NonbondedSoftcoreForce
::
CutoffPeriodic
)
{
method
=
FREE_ENERGY_PERIODIC
;
}
// setup parameters
gpuSetNonbondedSoftcoreParameters
(
data
.
getFreeEnergyGpu
(),
138.935485
f
,
particle
,
c6
,
c12
,
q
,
softcoreLJLambdaArray
,
symbol
,
exclusionList
,
method
,
static_cast
<
float
>
(
force
.
getCutoffDistance
()
),
static_cast
<
float
>
(
force
.
getReactionFieldDielectric
()));
}
// Initialize 1-4 nonbonded interactions.
numExceptions
=
exceptions
.
size
();
if
(
numExceptions
>
0
){
std
::
vector
<
int
>
particle1
(
numExceptions
);
std
::
vector
<
int
>
particle2
(
numExceptions
);
std
::
vector
<
float
>
c6
(
numExceptions
);
std
::
vector
<
float
>
c12
(
numExceptions
);
std
::
vector
<
float
>
qProd
(
numExceptions
);
std
::
vector
<
float
>
softcoreLJLambdaArray
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
{
double
charge
,
sig
,
eps
,
softcoreLJLambda
;
force
.
getExceptionParameters
(
exceptions
[
i
],
particle1
[
i
],
particle2
[
i
],
charge
,
sig
,
eps
,
softcoreLJLambda
);
c6
[
i
]
=
static_cast
<
float
>
(
(
4.0
*
eps
*
pow
(
sig
,
6.0
))
);
c12
[
i
]
=
static_cast
<
float
>
(
(
4.0
*
eps
*
pow
(
sig
,
12.0
))
);
qProd
[
i
]
=
static_cast
<
float
>
(
charge
);
softcoreLJLambdaArray
[
i
]
=
static_cast
<
float
>
(
softcoreLJLambda
);
}
gpuSetLJ14SoftcoreParameters
(
data
.
getFreeEnergyGpu
(),
138.935485
f
,
particle1
,
particle2
,
c6
,
c12
,
qProd
,
softcoreLJLambdaArray
);
}
else
if
(
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"Mo nonbonded softcore exceptions.
\n
"
);
(
void
)
fflush
(
data
.
getLog
()
);
}
data
.
getFreeEnergyGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
double
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
){
// ---------------------------------------------------------------------------------------
static
const
std
::
string
methodName
=
"CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces"
;
// ---------------------------------------------------------------------------------------
freeEnergyGpuContext
gpu
=
data
.
getFreeEnergyGpu
();
data
.
initializeGpu
(
);
// calculate nonbonded ixns here, only if implicit solvent is inactive
if
(
!
getIncludeGBSA
()
&&
!
getIncludeGBVI
()
)
{
kCalculateCDLJSoftcoreForces
(
gpu
);
}
// local LJ-14 forces
if
(
getNumExceptions
()
>
0
){
kCalculateLocalSoftcoreForces
(
gpu
);
}
return
0.0
;
}
bool
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
getIncludeGBSA
(
void
)
const
{
return
bIncludeGBSA
;
}
void
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
setIncludeGBSA
(
bool
inputIncludeGBSA
){
bIncludeGBSA
=
inputIncludeGBSA
;
}
bool
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
getIncludeGBVI
(
void
)
const
{
return
bIncludeGBVI
;
}
void
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
setIncludeGBVI
(
bool
inputIncludeGBVI
){
bIncludeGBVI
=
inputIncludeGBVI
;
}
int
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
getIncludeSoftcore
(
void
)
const
{
return
includeSoftcore
;
}
int
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
getNumExceptions
(
void
)
const
{
return
numExceptions
;
}
void
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
setIncludeSoftcore
(
int
inputIncludeSoftcore
){
includeSoftcore
=
inputIncludeSoftcore
;
}
class
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
GBSAOBCSoftcoreForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
radius1
,
radius2
,
scale1
,
scale2
,
particleNonPolarScalingFactor1
,
particleNonPolarScalingFactor2
;
force
.
getParticleParameters
(
particle1
,
charge1
,
radius1
,
scale1
,
particleNonPolarScalingFactor1
);
force
.
getParticleParameters
(
particle2
,
charge2
,
radius2
,
scale2
,
particleNonPolarScalingFactor2
);
return
(
charge1
==
charge2
&&
radius1
==
radius2
&&
scale1
==
scale2
&&
particleNonPolarScalingFactor1
==
particleNonPolarScalingFactor2
);
}
private:
const
GBSAOBCSoftcoreForce
&
force
;
};
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
::~
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
()
{
if
(
0
&&
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"~CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel called.
\n
"
);
(
void
)
fflush
(
data
.
getLog
()
);
}
data
.
decrementKernelCount
();
}
void
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
::
initialize
(
const
System
&
system
,
const
GBSAOBCSoftcoreForce
&
force
)
{
// ---------------------------------------------------------------------------------------
freeEnergyGpuContext
gpu
=
data
.
getFreeEnergyGpu
();
MapStringInt
forceMap
;
getForceMap
(
system
,
forceMap
,
log
);
// check that nonbonded (non-softcore is not active)
if
(
forceMap
.
find
(
NB_FORCE
)
!=
forceMap
.
end
()
){
throw
OpenMMException
(
"Mixing NonbondedForce and GBSAOBCSoftoreForce is not allowed -- use NonbondedSoftcoreForce "
);
}
if
(
forceMap
.
find
(
NB_SOFTCORE_FORCE
)
==
forceMap
.
end
()
){
throw
OpenMMException
(
"NonbondedSoftcore force must be included w/ GBSAOBCSoftcore force."
);
}
int
numParticles
=
system
.
getNumParticles
();
std
::
vector
<
float
>
radius
(
numParticles
);
std
::
vector
<
float
>
scale
(
numParticles
);
std
::
vector
<
float
>
charge
(
numParticles
);
std
::
vector
<
float
>
nonPolarScalingFactors
(
numParticles
);
for
(
int
ii
=
0
;
ii
<
numParticles
;
ii
++
){
double
particleCharge
,
particleRadius
,
scalingFactor
,
particleNonPolarScalingFactor
;
force
.
getParticleParameters
(
ii
,
particleCharge
,
particleRadius
,
scalingFactor
,
particleNonPolarScalingFactor
);
radius
[
ii
]
=
static_cast
<
float
>
(
particleRadius
);
scale
[
ii
]
=
static_cast
<
float
>
(
scalingFactor
);
charge
[
ii
]
=
static_cast
<
float
>
(
particleCharge
);
nonPolarScalingFactors
[
ii
]
=
static_cast
<
float
>
(
particleNonPolarScalingFactor
);
}
gpuSetObcSoftcoreParameters
(
gpu
,
static_cast
<
float
>
(
force
.
getSoluteDielectric
()),
static_cast
<
float
>
(
force
.
getSolventDielectric
()),
static_cast
<
float
>
(
force
.
getNonPolarPrefactor
()),
radius
,
scale
,
charge
,
nonPolarScalingFactors
);
data
.
getFreeEnergyGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
return
;
}
double
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
// ---------------------------------------------------------------------------------------
freeEnergyGpuContext
freeEnergyGpu
=
data
.
getFreeEnergyGpu
();
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
int
call
=
0
;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
data
.
initializeGpu
(
);
// (1) clear Born force array
// (2) calculate Born radii and sum
// (3) loop 1
// (4) sum/calculate Born forces
// (5) loop 2
kClearSoftcoreBornForces
(
gpu
);
kCalculateObcGbsaSoftcoreBornSum
(
freeEnergyGpu
);
kReduceObcGbsaSoftcoreBornSum
(
gpu
);
kCalculateCDLJObcGbsaSoftcoreForces1
(
freeEnergyGpu
);
// sum Born forces and execute second OBC loop
kReduceObcGbsaSoftcoreBornForces
(
gpu
);
kCalculateObcGbsaSoftcoreForces2
(
freeEnergyGpu
);
if
(
data
.
getLog
()
){
kPrintObcGbsaSoftcore
(
freeEnergyGpu
,
"Post kCalculateObcGbsaSoftcoreForces2"
,
call
,
data
.
getLog
()
);
}
return
0.0
;
}
class
CudaFreeEnergyCalcGBVISoftcoreForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
GBVISoftcoreForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
radius1
,
radius2
,
gamma1
,
gamma2
,
bornRadiusScaleFactor1
,
bornRadiusScaleFactor2
;
force
.
getParticleParameters
(
particle1
,
charge1
,
radius1
,
gamma1
,
bornRadiusScaleFactor1
);
force
.
getParticleParameters
(
particle2
,
charge2
,
radius2
,
gamma2
,
bornRadiusScaleFactor2
);
return
(
charge1
==
charge2
&&
radius1
==
radius2
&&
gamma1
==
gamma2
&&
bornRadiusScaleFactor1
==
bornRadiusScaleFactor2
);
}
private:
const
GBVISoftcoreForce
&
force
;
};
CudaFreeEnergyCalcGBVISoftcoreForceKernel
::~
CudaFreeEnergyCalcGBVISoftcoreForceKernel
()
{
if
(
0
&&
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"~CudaFreeEnergyCalcGBVISoftcoreForceKernel called.
\n
"
);
(
void
)
fflush
(
data
.
getLog
()
);
}
data
.
decrementKernelCount
();
}
void
CudaFreeEnergyCalcGBVISoftcoreForceKernel
::
initialize
(
const
System
&
system
,
const
GBVISoftcoreForce
&
force
,
const
std
::
vector
<
double
>
&
inputScaledRadii
)
{
// ---------------------------------------------------------------------------------------
int
numParticles
=
system
.
getNumParticles
();
freeEnergyGpuContext
gpu
=
data
.
getFreeEnergyGpu
();
// check forces and relevant parameters
MapStringInt
forceMap
;
getForceMap
(
system
,
forceMap
,
log
);
// check that nonbonded (non-softcore is not active)
if
(
forceMap
.
find
(
NB_FORCE
)
!=
forceMap
.
end
()
){
throw
OpenMMException
(
"Mixing NonbondedForce and GBVISoftoreForce not allowed -- use NonbondedSoftcoreForce "
);
}
std
::
vector
<
int
>
particle
(
numParticles
);
std
::
vector
<
float
>
radius
(
numParticles
);
std
::
vector
<
float
>
scaledRadii
(
numParticles
);
std
::
vector
<
float
>
gammas
(
numParticles
);
std
::
vector
<
float
>
bornRadiusScaleFactors
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
){
double
charge
,
particleRadius
,
gamma
,
bornRadiusScaleFactor
;
force
.
getParticleParameters
(
i
,
charge
,
particleRadius
,
gamma
,
bornRadiusScaleFactor
);
particle
[
i
]
=
i
;
radius
[
i
]
=
static_cast
<
float
>
(
particleRadius
);
gammas
[
i
]
=
static_cast
<
float
>
(
gamma
);
scaledRadii
[
i
]
=
static_cast
<
float
>
(
inputScaledRadii
[
i
]
);
bornRadiusScaleFactors
[
i
]
=
static_cast
<
float
>
(
bornRadiusScaleFactor
);
}
std
::
vector
<
float
>
quinticSplineParameters
;
if
(
force
.
getBornRadiusScalingMethod
()
==
GBVISoftcoreForce
::
QuinticSpline
){
// quintic spline
quinticSplineParameters
.
resize
(
2
);
quinticSplineParameters
[
0
]
=
static_cast
<
float
>
(
force
.
getQuinticLowerLimitFactor
());
quinticSplineParameters
[
1
]
=
static_cast
<
float
>
(
force
.
getQuinticUpperBornRadiusLimit
());
quinticSplineParameters
[
1
]
=
powf
(
quinticSplineParameters
[
1
],
-
3.0
f
);
quinticScaling
=
1
;
}
// load parameters onto board
// defined in kCalculateGBVISoftcore.cu
gpuSetGBVISoftcoreParameters
(
gpu
,
static_cast
<
float
>
(
force
.
getSoluteDielectric
()
),
static_cast
<
float
>
(
force
.
getSolventDielectric
()
),
particle
,
radius
,
gammas
,
scaledRadii
,
bornRadiusScaleFactors
,
quinticSplineParameters
);
data
.
getFreeEnergyGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
return
;
}
double
CudaFreeEnergyCalcGBVISoftcoreForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
freeEnergyGpuContext
freeEnergyGpu
=
data
.
getFreeEnergyGpu
();
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
data
.
initializeGpu
(
);
// (1) clear Born force array
// (2) calculate Born radii and sum
// (3) loop 1
// (4) sum/calculate Born forces
// (5) loop 2
// calculate Born radii and first loop of GB/VI forces
kClearSoftcoreBornForces
(
gpu
);
kCalculateGBVISoftcoreBornSum
(
freeEnergyGpu
);
if
(
quinticScaling
){
kReduceGBVIBornSumQuinticScaling
(
freeEnergyGpu
);
}
else
{
kReduceGBVISoftcoreBornSum
(
freeEnergyGpu
);
}
kCalculateCDLJObcGbsaSoftcoreForces1
(
freeEnergyGpu
);
if
(
quinticScaling
){
kReduceGBVIBornForcesQuinticScaling
(
freeEnergyGpu
);
}
else
{
kReduceGBVISoftcoreBornForces
(
freeEnergyGpu
);
}
// second loop of GB/VI forces
kCalculateGBVISoftcoreForces2
(
freeEnergyGpu
);
if
(
data
.
getLog
()
){
kPrintGBVISoftcore
(
freeEnergyGpu
,
"Post kCalculateGBVISoftcoreForces2"
,
0
,
data
.
getLog
()
);
}
return
0.0
;
}
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.h
deleted
100644 → 0
View file @
c775bd19
#ifndef OPENMM_FREE_ENERGY_CUDA_KERNELS_H_
#define OPENMM_FREE_ENERGY_CUDA_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaPlatform.h"
#include "openmm/kernels.h"
#include "kernels/gputypes.h"
#include "openmm/System.h"
#include "OpenMMFreeEnergy.h"
#include "openmm/freeEnergyKernels.h"
#include "FreeEnergyCudaData.h"
namespace
OpenMM
{
/**
* This kernel is invoked by NonbondedSoftcoreForce to calculate the forces acting on the system.
*/
class
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
:
public
CalcNonbondedSoftcoreForceKernel
{
public:
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
FreeEnergyCudaData
&
data
,
System
&
system
)
:
CalcNonbondedSoftcoreForceKernel
(
name
,
platform
),
data
(
data
),
system
(
system
)
{
numExceptions
=
0
;
numParticles
=
0
;
bIncludeGBSA
=
false
;
bIncludeGBVI
=
false
;
includeSoftcore
=
false
;
log
=
NULL
;
data
.
incrementKernelCount
();
}
~
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the NonbondedForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NonbondedSoftcoreForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Get flag signalling whether GBSA/OBC force is included
*
* @return flag
*/
bool
getIncludeGBSA
(
void
)
const
;
/**
* Set flag signalling whether GBSA/OBC force is included
*
* @param inputIncludeGBSA input flag value
*/
void
setIncludeGBSA
(
bool
inputIncludeGBSA
);
/**
* Get flag signalling whether GB/VI force is included
*
* @return flag
*/
bool
getIncludeGBVI
(
void
)
const
;
/**
* Set flag signalling whether GB/VI force is included
*
* @param inputIncludeGBVI input flag value
*/
void
setIncludeGBVI
(
bool
inputIncludeGBVI
);
/**
* Get flag signalling whether softcore force is included
*
* @return flag
*/
int
getIncludeSoftcore
(
void
)
const
;
/**
* Set flag signalling whether GB/VI force is included
*
* @param inputIncludeGBVI input flag value
*/
void
setIncludeSoftcore
(
int
inputSoftcore
);
/**
* Get number of exceptions
*
* @return number of exceptions
*/
int
getNumExceptions
(
void
)
const
;
private:
FreeEnergyCudaData
&
data
;
class
ForceInfo
;
int
numParticles
;
System
&
system
;
bool
bIncludeGBSA
;
bool
bIncludeGBVI
;
int
includeSoftcore
;
int
numExceptions
;
FILE
*
log
;
};
/**
* This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
*/
class
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
:
public
CalcGBSAOBCSoftcoreForceKernel
{
public:
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
FreeEnergyCudaData
&
data
)
:
CalcGBSAOBCSoftcoreForceKernel
(
name
,
platform
),
data
(
data
)
{
log
=
NULL
;
data
.
incrementKernelCount
();
}
~
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBSAOBCForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
GBSAOBCSoftcoreForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
private:
FreeEnergyCudaData
&
data
;
class
ForceInfo
;
FILE
*
log
;
};
/**
* This kernel is invoked by GBVIForce to calculate the forces acting on the system.
*/
class
CudaFreeEnergyCalcGBVISoftcoreForceKernel
:
public
CalcGBVISoftcoreForceKernel
{
public:
CudaFreeEnergyCalcGBVISoftcoreForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
FreeEnergyCudaData
&
data
)
:
CalcGBVISoftcoreForceKernel
(
name
,
platform
),
data
(
data
)
{
log
=
NULL
;
quinticScaling
=
0
;
data
.
incrementKernelCount
();
}
~
CudaFreeEnergyCalcGBVISoftcoreForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBVIForce this kernel will be used for
* @param scaledRadii the scaled radii (Eq. 5 of Labute paper)
*/
void
initialize
(
const
System
&
system
,
const
GBVISoftcoreForce
&
force
,
const
std
::
vector
<
double
>
&
scaledRadii
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
private:
FreeEnergyCudaData
&
data
;
class
ForceInfo
;
FILE
*
log
;
int
quinticScaling
;
};
}
// namespace OpenMM
#endif
/*OPENMM_FREE_ENERGY_CUDA_KERNELS_H_*/
plugins/freeEnergy/platforms/cuda/src/FreeEnergyCudaData.cpp
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMMFreeEnergy *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "FreeEnergyCudaData.h"
#include "openmm/OpenMMException.h"
#include <sstream>
extern
"C"
void
removeFreeEnergyCudaDataFromContextMap
(
void
*
context
);
namespace
OpenMM
{
FreeEnergyCudaData
::
FreeEnergyCudaData
(
CudaPlatform
::
PlatformData
&
data
)
:
cudaPlatformData
(
data
)
{
kernelCount
=
0
;
freeEnergyGpu
=
freeEnergyGpuInit
(
cudaPlatformData
.
gpu
);
log
=
NULL
;
contextImpl
=
NULL
;
gpuInitialized
=
false
;
boxDimensions
[
0
]
=
0.0
;
boxDimensions
[
1
]
=
0.0
;
boxDimensions
[
2
]
=
0.0
;
}
FreeEnergyCudaData
::~
FreeEnergyCudaData
()
{
if
(
getLog
()
){
(
void
)
fprintf
(
getLog
(),
"~FreeEnergyCudaData called kernelCount=%d
\n
"
,
kernelCount
);
(
void
)
fflush
(
getLog
()
);
}
freeEnergyGpuShutDown
(
freeEnergyGpu
);
}
void
FreeEnergyCudaData
::
decrementKernelCount
(
void
)
{
kernelCount
--
;
if
(
getLog
()
){
(
void
)
fprintf
(
getLog
(),
"~reeEnergyCudaData decrementKernelCount called. %d
\n
"
,
kernelCount
);
(
void
)
fflush
(
getLog
()
);
}
if
(
kernelCount
==
0
&&
contextImpl
!=
NULL
){
removeFreeEnergyCudaDataFromContextMap
(
contextImpl
);
freeEnergyGpuShutDown
(
freeEnergyGpu
);
}
}
void
FreeEnergyCudaData
::
incrementKernelCount
(
void
)
{
kernelCount
++
;
}
freeEnergyGpuContext
FreeEnergyCudaData
::
getFreeEnergyGpu
(
void
)
const
{
return
freeEnergyGpu
;
}
void
FreeEnergyCudaData
::
setLog
(
FILE
*
inputLog
)
{
log
=
inputLog
;
freeEnergyGpu
->
log
=
inputLog
;
}
FILE
*
FreeEnergyCudaData
::
getLog
(
void
)
const
{
return
log
;
}
void
FreeEnergyCudaData
::
setContextImpl
(
void
*
inputContextImpl
)
{
contextImpl
=
inputContextImpl
;
}
void
FreeEnergyCudaData
::
initializeGpu
(
void
)
{
if
(
!
gpuInitialized
){
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
if
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedCutoff
!=
gpu
->
sim
.
nonbondedCutoff
){
std
::
stringstream
msg
;
msg
<<
"The softcore non-bonded cutoff="
<<
freeEnergyGpu
->
freeEnergySim
.
nonbondedCutoff
;
msg
<<
"does not agree with the non-softcore cutoff= "
<<
gpu
->
sim
.
nonbondedCutoff
;
throw
OpenMM
::
OpenMMException
(
msg
.
str
()
);
}
/*
freeEnergyGpuBuildOutputBuffers( freeEnergyGpu, getHasFreeEnergyGeneralizedKirkwood() );
freeEnergyGpuBuildThreadBlockWorkList( freeEnergyGpu );
boxDimensions[0] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeX;
boxDimensions[1] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeY;
boxDimensions[2] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeZ;
*/
gpuBuildExclusionList
(
gpu
);
gpuSetConstants
(
gpu
);
freeEnergyGpuSetConstants
(
freeEnergyGpu
);
gpuInitialized
=
true
;
if
(
log
){
//gpuPrintCudaFreeEnergyGmxSimulation( freeEnergyGpu, getLog() );
(
void
)
fprintf
(
log
,
"FreeEnergyCudaGpu initialized kernelCount=%d
\n
"
,
kernelCount
);
(
void
)
fflush
(
log
);
}
}
else
{
/*
if( boxDimensions[0] != freeEnergyGpu->gpuContext->sim.periodicBoxSizeX ||
boxDimensions[1] != freeEnergyGpu->gpuContext->sim.periodicBoxSizeY ||
boxDimensions[2] != freeEnergyGpu->gpuContext->sim.periodicBoxSizeZ ){
freeEnergyGpuSetConstants( freeEnergyGpu, 1 );
boxDimensions[0] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeX;
boxDimensions[1] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeY;
boxDimensions[2] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeZ;
}
*/
}
return
;
}
}
plugins/freeEnergy/platforms/cuda/src/FreeEnergyCudaData.h
deleted
100644 → 0
View file @
c775bd19
#ifndef FREE_ENERGY_CUDA_DATA_H_
#define FREE_ENERGY_CUDA_DATA_H_
/* -------------------------------------------------------------------------- *
* OpenMMFreeEnergy *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaPlatform.h"
#include "kernels/freeEnergyGpuTypes.h"
#include "kernels/cudaKernels.h"
#include "openmm/KernelImpl.h"
namespace
OpenMM
{
/**
* Free energy Cuda data
*/
class
FreeEnergyCudaData
{
public:
FreeEnergyCudaData
(
CudaPlatform
::
PlatformData
&
data
);
~
FreeEnergyCudaData
();
/**
* Increment kernel count
*
*/
void
incrementKernelCount
(
void
);
/**
* Decrement kernel count
*
*/
void
decrementKernelCount
(
void
);
/**
* Return freeEnergyGpuContext context
*
* @return freeEnergyGpuContext
*/
freeEnergyGpuContext
OPENMMCUDA_EXPORT
getFreeEnergyGpu
(
void
)
const
;
/**
* Set log file reference
*
* @param log file reference; if not set, then no logging
*/
void
setLog
(
FILE
*
inputLog
);
/**
* Get log file reference
*
* @return log file reference
*/
FILE
*
getLog
(
void
)
const
;
/**
* if gpuInitialized is false, write data to board
*
* @param log file reference; if not set, then no logging
*/
void
initializeGpu
(
void
);
/**
* Set contextImpl
*
* @param contextImpl reference
*/
void
setContextImpl
(
void
*
contextImpl
);
CudaPlatform
::
PlatformData
&
cudaPlatformData
;
private:
freeEnergyGpuContext
freeEnergyGpu
;
unsigned
int
kernelCount
;
void
*
contextImpl
;
FILE
*
log
;
bool
gpuInitialized
;
double
boxDimensions
[
3
];
};
}
// namespace OpenMM
#endif
/*FREE_ENERGY_CUDA_DATA_H_*/
plugins/freeEnergy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
deleted
100755 → 0
View file @
c775bd19
#ifndef __GPU_FREE_ENERGY_KERNELS_H__
#define __GPU_FREE_ENERGY_KERNELS_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include "freeEnergyGpuTypes.h"
#include "cudatypes.h"
#include <vector>
#include <cuda.h>
// Function prototypes
// CDLJ softcore
// setup methods called from CudaFreeEnergyKernels
// nonbonded and 1-4 ixns
extern
"C"
bool
gpuIsAvailableSoftcore
();
extern
"C"
void
freeEnergyGpuSetPeriodicBoxSize
(
freeEnergyGpuContext
gpu
,
float
xsize
,
float
ysize
,
float
zsize
);
extern
"C"
void
gpuSetNonbondedSoftcoreParameters
(
freeEnergyGpuContext
gpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q
,
const
std
::
vector
<
float
>&
softcoreLJLambdaArray
,
const
std
::
vector
<
char
>&
symbol
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
CudaFreeEnergyNonbondedMethod
method
,
float
cutoffDistance
,
float
reactionFieldDielectric
);
extern
"C"
void
gpuSetLJ14SoftcoreParameters
(
freeEnergyGpuContext
gpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
qProd
,
const
std
::
vector
<
float
>&
softcoreLJLambdaArray
);
// write address's to device
extern
"C"
void
SetCalculateCDLJSoftcoreGpuSim
(
freeEnergyGpuContext
gpu
);
extern
"C"
void
SetCalculateLocalSoftcoreGpuSim
(
freeEnergyGpuContext
gpu
);
// kernel calls to device
extern
"C"
void
kCalculateCDLJSoftcoreForces
(
freeEnergyGpuContext
gpu
);
extern
"C"
void
kCalculateLocalSoftcoreForces
(
freeEnergyGpuContext
gpu
);
// GB/VI softcore
// setup method called from CudaFreeEnergyKernels
extern
"C"
void
gpuSetGBVISoftcoreParameters
(
freeEnergyGpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
gamma
,
const
std
::
vector
<
float
>&
scaledRadii
,
const
std
::
vector
<
float
>&
bornRadiusScaleFactors
,
const
std
::
vector
<
float
>&
quinticSplineParameters
);
// write address's to device
extern
"C"
void
SetCalculateGBVISoftcoreForcesSim
(
gpuContext
gpu
,
float
*
softCoreLJLambda
);
extern
"C"
void
SetCalculateGBVISoftcoreBornSumGpuSim
(
freeEnergyGpuContext
gpu
);
extern
"C"
void
SetCalculateGBVISoftcoreForces2Sim
(
freeEnergyGpuContext
gpu
);
// kernel calls to device
extern
void
kReduceGBVIBornSumQuinticScaling
(
freeEnergyGpuContext
gpu
);
extern
void
kCalculateGBVISoftcoreBornSum
(
freeEnergyGpuContext
gpu
);
extern
void
kReduceGBVIBornForcesQuinticScaling
(
freeEnergyGpuContext
gpu
);
extern
void
kCalculateGBVISoftcoreForces2
(
freeEnergyGpuContext
gpu
);
extern
void
kReduceGBVISoftcoreBornForces
(
freeEnergyGpuContext
gpu
);
extern
void
kReduceGBVISoftcoreBornSum
(
freeEnergyGpuContext
gpu
);
extern
void
kPrintGBVISoftcore
(
freeEnergyGpuContext
gpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
);
extern
void
kClearSoftcoreBornForces
(
gpuContext
gpu
);
// Obc softcore
// setup method called from CudaFreeEnergyKernels
/**
* Initialize parameters for Cuda Obc softcore
*
* @param gpu gpu context
* @param innerDielectric solute dielectric
* @param solventDielectric solvent dielectric
* @param radius intrinsic Born radii
* @param scale Obc scaling factors
* @param charge atomic charges (possibly overwritten by other methods?)
* @param nonPolarScalingFactors non-polar scaling factors
*
*/
extern
"C"
void
gpuSetObcSoftcoreParameters
(
freeEnergyGpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
float
nonPolarPrefactor
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
scale
,
const
std
::
vector
<
float
>&
charge
,
const
std
::
vector
<
float
>&
nonPolarScalingFactors
);
// write address's to device
extern
"C"
void
SetCalculateObcGbsaSoftcoreBornSumSim
(
freeEnergyGpuContext
gpu
);
// this method and kCalculateObcGbsaSoftcoreForces2() are being
// used until changes in OpenMM version are made
extern
"C"
void
SetCalculateObcGbsaSoftcoreForces2Sim
(
freeEnergyGpuContext
gpu
);
// kernel calls to device
extern
void
kClearObcGbsaSoftcoreBornSum
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaSoftcoreBornForces
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaSoftcoreBornSum
(
freeEnergyGpuContext
gpu
);
extern
void
kReduceObcGbsaSoftcoreBornSum
(
gpuContext
gpu
);
// this method is not needed; the OpenMM version can be used
extern
void
kCalculateObcGbsaSoftcoreForces2
(
freeEnergyGpuContext
gpu
);
extern
void
kPrintObcGbsaSoftcore
(
freeEnergyGpuContext
gpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
);
// shared
extern
"C"
void
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim
(
freeEnergyGpuContext
gpu
);
extern
void
kCalculateCDLJObcGbsaSoftcoreForces1
(
freeEnergyGpuContext
gpu
);
extern
"C"
void
showWorkUnitsFreeEnergy
(
freeEnergyGpuContext
freeEnergyGpu
,
int
interactingWorkUnit
);
#endif //__GPU_FREE_ENERGY_KERNELS_H__
plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyCudaGpu.cpp
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMMFreeEnergy *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman, Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES
/* M_PI */
#endif
#define PARAMETER_PRINT 1
#define MAX_PARAMETER_PRINT 10
#include "openmm/OpenMMException.h"
#include "cudaKernels.h"
#include "GpuFreeEnergyCudaKernels.h"
#include "freeEnergyGpuTypes.h"
// for some reason, these are not being included w/ cudaKernels.h on Windows
//extern void OPENMMCUDA_EXPORT SetCalculateObcGbsaForces2Sim(gpuContext gpu);
extern
void
OPENMMCUDA_EXPORT
SetForcesSim
(
gpuContext
gpu
);
#include <cmath>
#include <iostream>
#include <sstream>
#include <limits>
#include <cstring>
#include <vector>
#include <stdio.h>
#ifdef WIN32
#include <windows.h>
#else
#include <sys/time.h>
#endif
using
std
::
vector
;
extern
"C"
freeEnergyGpuContext
freeEnergyGpuInit
(
_gpuContext
*
gpu
){
// allocate and zero block
freeEnergyGpuContext
freeEnergyGpu
=
new
_freeEnergyGpuContext
;
memset
(
freeEnergyGpu
,
0
,
sizeof
(
struct
_freeEnergyGpuContext
)
);
freeEnergyGpu
->
gpuContext
=
gpu
;
return
freeEnergyGpu
;
}
extern
"C"
void
gpuPrintCudaStream
(
std
::
string
name
,
unsigned
int
length
,
unsigned
int
subStreams
,
unsigned
int
stride
,
unsigned
int
memoryFootprint
,
void
*
pSysStream
,
void
*
pDevStream
,
void
*
pSysData
,
void
*
pDevData
,
FILE
*
log
)
{
(
void
)
fprintf
(
log
,
" %-35s [%8u %5u %8u %8u] Stream[%p %p] Data[%16p %16p]
\n
"
,
name
.
c_str
(),
length
,
subStreams
,
stride
,
memoryFootprint
,
pSysStream
,
pDevStream
,
pSysData
,
pDevData
);
}
extern
"C"
int
gpuPrintCudaStreamFloat
(
CUDAStream
<
float
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float
);
}
extern
"C"
int
gpuPrintCudaStreamFloat2
(
CUDAStream
<
float2
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float2
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
2
*
sizeof
(
float
);
}
extern
"C"
int
gpuPrintCudaStreamFloat4
(
CUDAStream
<
float4
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float4
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
4
*
sizeof
(
float
);
}
extern
"C"
int
gpuPrintCudaStreamUnsignedInt
(
CUDAStream
<
unsigned
int
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
unsigned
int
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
unsigned
int
);
}
extern
"C"
int
gpuPrintCudaStreamInt
(
CUDAStream
<
int
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int
);
}
extern
"C"
int
gpuPrintCudaStreamInt2
(
CUDAStream
<
int2
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int2
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
2
*
sizeof
(
int
);
}
extern
"C"
int
gpuPrintCudaStreamInt4
(
CUDAStream
<
int4
>*
cUDAStream
,
FILE
*
log
)
{
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int4
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
4
*
sizeof
(
int
);
}
extern
"C"
void
gpuPrintCudaFreeEnergyGmxSimulation
(
freeEnergyGpuContext
freeEnergyGpu
,
FILE
*
log
)
{
if
(
log
==
NULL
)
return
;
_gpuContext
*
gpu
=
freeEnergyGpu
->
gpuContext
;
int
totalMemory
=
0
;
(
void
)
fprintf
(
log
,
"cudaFreeEnergyGmxSimulation:
\n\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
" numberOfAtoms %u
\n
"
,
gpu
->
natoms
);
(
void
)
fprintf
(
log
,
" paddedNumberOfAtoms %u
\n
"
,
gpu
->
sim
.
paddedNumberOfAtoms
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
" gpuContext %p
\n
"
,
freeEnergyGpu
->
gpuContext
);
(
void
)
fprintf
(
log
,
" log %p %s
\n
"
,
freeEnergyGpu
->
log
,
freeEnergyGpu
->
log
==
stderr
?
"is stderr"
:
"is not stderr"
);
(
void
)
fprintf
(
log
,
" sm_version %u
\n
"
,
gpu
->
sm_version
);
(
void
)
fprintf
(
log
,
" device %u
\n
"
,
gpu
->
device
);
(
void
)
fprintf
(
log
,
" sharedMemoryPerBlock %u
\n
"
,
gpu
->
sharedMemoryPerBlock
);
(
void
)
fprintf
(
log
,
" bOutputBufferPerWarp %d
\n
"
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
log
,
" blocks %u
\n
"
,
gpu
->
sim
.
blocks
);
(
void
)
fprintf
(
log
,
" threads_per_block %u
\n
"
,
gpu
->
sim
.
threads_per_block
);
(
void
)
fprintf
(
log
,
" update_threads_per_block %u
\n
"
,
gpu
->
sim
.
update_threads_per_block
);
(
void
)
fprintf
(
log
,
" nonbondBlocks %u
\n
"
,
gpu
->
sim
.
nonbond_blocks
);
(
void
)
fprintf
(
log
,
" nonbondThreadsPerBlock %u
\n
"
,
gpu
->
sim
.
nonbond_threads_per_block
);
(
void
)
fprintf
(
log
,
" bsf_reduce_threads_per_block %u
\n
"
,
gpu
->
sim
.
bsf_reduce_threads_per_block
);
(
void
)
fprintf
(
log
,
" nonbondOutputBuffers %u
\n
"
,
gpu
->
sim
.
nonbondOutputBuffers
);
(
void
)
fprintf
(
log
,
" outputBuffers %u
\n
"
,
gpu
->
sim
.
outputBuffers
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
freeEnergyGpu
->
gpuContext
->
psEnergy
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
freeEnergyGpu
->
gpuContext
->
psForce4
,
log
);
(
void
)
fflush
(
log
);
}
extern
"C"
void
freeEnergyGpuShutDown
(
freeEnergyGpuContext
freeEnergyGpu
){
if
(
freeEnergyGpu
->
log
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"freeEnergyGpuShutDown called.
\n
"
);
(
void
)
fflush
(
freeEnergyGpu
->
log
);
}
// free free energy Cuda arrays
delete
freeEnergyGpu
->
psLJ14ID
;
delete
freeEnergyGpu
->
psLJ14Parameter
;
delete
freeEnergyGpu
->
psSigEps4
;
delete
freeEnergyGpu
->
psSwitchDerivative
;
delete
freeEnergyGpu
->
psNonPolarScalingFactors
;
delete
freeEnergyGpu
;
return
;
}
extern
"C"
void
freeEnergyGpuSetConstants
(
freeEnergyGpuContext
freeEnergyGpu
){
if
(
freeEnergyGpu
->
log
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"FreeEnergyGpuSetConstants called
\n
"
);
(
void
)
fflush
(
freeEnergyGpu
->
log
);
}
SetCalculateLocalSoftcoreGpuSim
(
freeEnergyGpu
);
SetCalculateCDLJSoftcoreGpuSim
(
freeEnergyGpu
);
SetCalculateGBVISoftcoreBornSumGpuSim
(
freeEnergyGpu
);
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim
(
freeEnergyGpu
);
SetCalculateGBVISoftcoreForces2Sim
(
freeEnergyGpu
);
SetCalculateObcGbsaSoftcoreBornSumSim
(
freeEnergyGpu
);
SetCalculateObcGbsaSoftcoreForces2Sim
(
freeEnergyGpu
);
}
extern
"C"
void
freeEnergyGpuSetPeriodicBoxSize
(
freeEnergyGpuContext
freeEnergyGpu
,
float
xsize
,
float
ysize
,
float
zsize
)
{
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeX
=
xsize
;
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeY
=
ysize
;
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeZ
=
zsize
;
freeEnergyGpu
->
freeEnergySim
.
invPeriodicBoxSizeX
=
1.0
f
/
xsize
;
freeEnergyGpu
->
freeEnergySim
.
invPeriodicBoxSizeY
=
1.0
f
/
ysize
;
freeEnergyGpu
->
freeEnergySim
.
invPeriodicBoxSizeZ
=
1.0
f
/
zsize
;
freeEnergyGpu
->
freeEnergySim
.
recipBoxSizeX
=
2.0
f
*
PI
/
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeX
;
freeEnergyGpu
->
freeEnergySim
.
recipBoxSizeY
=
2.0
f
*
PI
/
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeY
;
freeEnergyGpu
->
freeEnergySim
.
recipBoxSizeZ
=
2.0
f
*
PI
/
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeZ
;
freeEnergyGpu
->
freeEnergySim
.
cellVolume
=
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeX
*
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeY
*
freeEnergyGpu
->
freeEnergySim
.
periodicBoxSizeZ
;
gpuSetPeriodicBoxSize
(
freeEnergyGpu
->
gpuContext
,
xsize
,
ysize
,
zsize
);
}
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable: 4297)
#endif
extern
"C"
void
gpuSetNonbondedSoftcoreParameters
(
freeEnergyGpuContext
freeEnergyGpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q
,
const
std
::
vector
<
float
>&
softcoreLJLambdaArray
,
const
std
::
vector
<
char
>&
symbol
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
CudaFreeEnergyNonbondedMethod
method
,
float
cutoffDistance
,
float
solventDielectric
)
{
unsigned
int
numberOfParticles
=
c6
.
size
();
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
int
paddedNumberOfAtoms
=
gpu
->
sim
.
paddedNumberOfAtoms
;
// sanity checks
if
(
paddedNumberOfAtoms
<
1
){
std
::
stringstream
msg
;
msg
<<
"gpuSetNonbondedSoftcoreParameters: number of padded atoms="
<<
gpu
->
sim
.
paddedNumberOfAtoms
<<
" is less than 1."
;
throw
OpenMM
::
OpenMMException
(
msg
.
str
()
);
}
if
(
freeEnergyGpu
->
gpuContext
->
sim
.
atoms
!=
numberOfParticles
){
std
::
stringstream
msg
;
msg
<<
"gpuSetNonbondedSoftcoreParameters: number of atoms in gpuContext does not match input count: "
<<
freeEnergyGpu
->
gpuContext
->
sim
.
atoms
<<
" "
<<
numberOfParticles
<<
"."
;
throw
OpenMM
::
OpenMMException
(
msg
.
str
()
);
}
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
freeEnergyGpu
->
freeEnergySim
.
epsfac
=
epsfac
;
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
=
method
;
freeEnergyGpu
->
freeEnergySim
.
nonbondedCutoff
=
cutoffDistance
;
freeEnergyGpu
->
freeEnergySim
.
nonbondedCutoffSqr
=
cutoffDistance
*
cutoffDistance
;
gpu
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
gpu
->
sim
.
nonbondedCutoffSqr
=
cutoffDistance
*
cutoffDistance
;
if
(
cutoffDistance
>
0.0
f
){
freeEnergyGpu
->
freeEnergySim
.
reactionFieldK
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
f
)
/
(
2.0
f
*
solventDielectric
+
1.0
f
);
freeEnergyGpu
->
freeEnergySim
.
reactionFieldC
=
(
1.0
f
/
cutoffDistance
)
*
(
3.0
f
*
solventDielectric
)
/
(
2.0
f
*
solventDielectric
+
1.0
f
);
gpu
->
sim
.
reactionFieldK
=
freeEnergyGpu
->
freeEnergySim
.
reactionFieldK
;
gpu
->
sim
.
reactionFieldC
=
freeEnergyGpu
->
freeEnergySim
.
reactionFieldC
;
}
else
{
freeEnergyGpu
->
freeEnergySim
.
reactionFieldK
=
0.0
f
;
freeEnergyGpu
->
freeEnergySim
.
reactionFieldC
=
0.0
f
;
}
setExclusions
(
gpu
,
exclusions
);
// parameters
freeEnergyGpu
->
psSigEps4
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"freeEnergyGpuSigEps4"
);
freeEnergyGpu
->
freeEnergySim
.
pSigEps4
=
freeEnergyGpu
->
psSigEps4
->
_pDevData
;
for
(
unsigned
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
float
p1
=
0.5
f
;
float
p2
=
0.0
f
;
if
(
(
c6
[
ii
]
>
0.0
f
)
&&
(
c12
[
ii
]
>
0.0
f
)
){
p1
=
0.5
f
*
powf
(
c12
[
ii
]
/
c6
[
ii
],
1.0
f
/
6.0
f
);
p2
=
c6
[
ii
]
*
sqrtf
(
1.0
f
/
c12
[
ii
]);
}
/*
if (symbol.size() > 0)
freeEnergyGpu->pAtomSymbol[ii] = symbol[ii];
*/
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
x
=
p1
;
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
y
=
p2
;
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
z
=
softcoreLJLambdaArray
[
ii
];
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
w
=
q
[
ii
];
}
// Dummy out extra atom data
for
(
int
ii
=
numberOfParticles
;
ii
<
paddedNumberOfAtoms
;
ii
++
){
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
x
=
1.0
f
;
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
y
=
0.0
f
;
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
z
=
0.0
f
;
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
w
=
0.0
f
;
(
*
gpu
->
psPosq4
)[
ii
].
x
=
100000.0
f
+
ii
*
10.0
f
;
(
*
gpu
->
psPosq4
)[
ii
].
y
=
100000.0
f
+
ii
*
10.0
f
;
(
*
gpu
->
psPosq4
)[
ii
].
z
=
100000.0
f
+
ii
*
10.0
f
;
(
*
gpu
->
psPosq4
)[
ii
].
w
=
0.0
f
;
}
if
(
freeEnergyGpu
->
log
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"freeEnergyGpuSetNonbondedSoftcoreParameters: %5u padded=%u epsfac=%14.7e method=%d cutoffDistance=%9.2f solventDielectric=%9.2f
\n
"
,
numberOfParticles
,
freeEnergyGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
,
epsfac
,
method
,
cutoffDistance
,
solventDielectric
);
#ifdef PARAMETER_PRINT
int
maxPrint
=
MAX_PARAMETER_PRINT
;
for
(
unsigned
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"%6u sig[%14.7e %14.7e] lambda=%10.3f q=%10.3f
\n
"
,
ii
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
x
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
y
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
z
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
w
);
if
(
ii
==
maxPrint
&&
ii
<
freeEnergyGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
-
maxPrint
){
ii
=
numberOfParticles
-
maxPrint
;
}
}
unsigned
int
offset
=
paddedNumberOfAtoms
-
maxPrint
;
if
(
offset
>
0
){
if
(
offset
>
numberOfParticles
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"Dummy padded entries
\n
"
);
for
(
int
ii
=
offset
;
ii
<
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"%6u sig[%14.7e %14.7e] lambda=%10.3f q=%10.3f
\n
"
,
ii
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
x
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
y
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
z
,
(
*
freeEnergyGpu
->
psSigEps4
)[
ii
].
w
);
}
}
}
#endif
(
void
)
fflush
(
freeEnergyGpu
->
log
);
}
// upload data to board
freeEnergyGpu
->
psSigEps4
->
Upload
();
gpu
->
psPosq4
->
Upload
();
return
;
}
/**---------------------------------------------------------------------------------------
Get threads/block
@param amoebaGpu amoebaGpuContext
@param sharedMemoryPerThread shared memory/thread
@param sharedMemoryPerBlock shared memory/block
@return threadsPerBlock
--------------------------------------------------------------------------------------- */
extern
"C"
unsigned
int
getThreadsPerBlockFEP
(
freeEnergyGpuContext
freeEnergyGpu
,
unsigned
int
sharedMemoryPerThread
,
unsigned
int
sharedMemoryPerBlock
)
{
unsigned
int
grid
=
freeEnergyGpu
->
gpuContext
->
grid
;
unsigned
int
threadsPerBlock
=
(
sharedMemoryPerBlock
+
grid
-
1
)
/
(
grid
*
sharedMemoryPerThread
);
threadsPerBlock
=
threadsPerBlock
<
1
?
1
:
threadsPerBlock
;
threadsPerBlock
*=
grid
;
return
threadsPerBlock
;
}
static
int
decodeCell
(
int
cellCode
,
unsigned
int
*
x
,
unsigned
int
*
y
,
unsigned
int
*
exclusion
){
*
x
=
cellCode
>>
17
;
*
y
=
(
cellCode
>>
2
)
&
0x7FFF
;
*
exclusion
=
(
cellCode
&
1
)
?
1
:
0
;
return
0
;
}
void
showWorkUnitsFreeEnergy
(
freeEnergyGpuContext
freeEnergyGpu
,
int
interactingWorkUnit
){
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
gpu
->
psWorkUnit
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
unsigned
int
totalWarps
=
(
gpu
->
sim
.
nonbond_blocks
*
gpu
->
sim
.
nonbond_threads_per_block
)
/
GRID
;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned
int
numWorkUnits
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
(
void
)
fprintf
(
stderr
,
"Total warps=%u blocks=%u threads=%u GRID=%u wus=%u warpOutput=%d
\n
"
,
totalWarps
,
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
GRID
,
numWorkUnits
,
gpu
->
bOutputBufferPerWarp
);
unsigned
int
maxPrint
=
3
;
std
::
stringstream
message
;
char
buffer
[
2048
];
unsigned
int
targetAtom
=
18
;
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
sim
.
nonbond_blocks
;
ii
++
)
{
unsigned
int
blockId
=
ii
;
for
(
unsigned
int
jj
=
0
;
jj
<
gpu
->
sim
.
nonbond_threads_per_block
;
jj
++
)
{
unsigned
int
warp
=
(
ii
*
gpu
->
sim
.
nonbond_threads_per_block
+
jj
)
/
GRID
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
print
=
0
;
while
(
pos
<
end
){
unsigned
int
x
,
y
,
exclusion
,
flags
;
int
flagInt
;
if
(
interactingWorkUnit
){
decodeCell
(
gpu
->
psInteractingWorkUnit
->
_pSysData
[
pos
],
&
x
,
&
y
,
&
exclusion
);
flags
=
gpu
->
psInteractionFlag
->
_pSysData
[
pos
];
if
(
flags
==
0xFFFFFFFF
){
flagInt
=
-
2
;
}
else
{
flagInt
=
flags
;
}
}
else
{
decodeCell
(
gpu
->
psWorkUnit
->
_pSysData
[
pos
],
&
x
,
&
y
,
&
exclusion
);
flagInt
=
-
1
;
}
x
*=
GRID
;
y
*=
GRID
;
unsigned
int
bufferX
=
gpu
->
bOutputBufferPerWarp
?
warp
*
gpu
->
sim
.
stride
:
(
x
>>
GRIDBITS
)
*
gpu
->
sim
.
stride
;
unsigned
int
bufferY
=
gpu
->
bOutputBufferPerWarp
?
warp
*
gpu
->
sim
.
stride
:
(
y
>>
GRIDBITS
)
*
gpu
->
sim
.
stride
;
bufferX
/=
gpu
->
sim
.
paddedNumberOfAtoms
;
bufferY
/=
gpu
->
sim
.
paddedNumberOfAtoms
;
if
(
jj
==
1
){
//if( bufferX >= 62 || bufferY >= 62 ){
(
void
)
sprintf
(
buffer
,
"Block %4u thread %4u warp=%4u pos[%4u %4u] bufXY[%4u %4u]"
,
ii
,
jj
,
warp
,
pos
,
end
,
bufferX
,
bufferY
);
message
<<
buffer
;
(
void
)
sprintf
(
buffer
,
" x[%4u %4u] y[%4u %4u] excl=%u"
,
x
,
x
+
32
,
y
,
y
+
32
,
exclusion
);
message
<<
buffer
;
if
(
interactingWorkUnit
){
(
void
)
sprintf
(
buffer
,
" Flg=%d (-2=all) %u"
,
flagInt
,
flags
);
}
message
<<
buffer
;
message
<<
std
::
endl
;
}
pos
++
;
}
}
}
(
void
)
fprintf
(
stderr
,
"%s
\n\n
"
,
message
.
str
().
c_str
()
);
}
plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyCudaTypes.h
deleted
100755 → 0
View file @
c775bd19
#ifndef FREE_ENERGY_CUDA_TYPES_H
#define FREE_ENERGY_CUDA_TYPES_H
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <kernels/cudatypes.h>
#include <stdarg.h>
#include <limits>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <builtin_types.h>
#include <vector_functions.h>
enum
CudaFreeEnergyNonbondedMethod
{
FREE_ENERGY_NO_CUTOFF
,
FREE_ENERGY_CUTOFF
,
FREE_ENERGY_PERIODIC
};
struct
cudaFreeEnergyGmxSimulation
{
// Constants
unsigned
int
LJ14_count
;
// LJ count
int4
*
pLJ14ID
;
// LJ 14 particles ids
float4
*
pLJ14Parameter
;
// LJ 14 parameters
float
epsfac
;
// Epsilon factor for CDLJ calculations
CudaFreeEnergyNonbondedMethod
nonbondedMethod
;
// How to handle nonbonded interactions
float
nonbondedCutoff
;
// Cutoff distance for nonbonded interactions
float
nonbondedCutoffSqr
;
// Square of the cutoff distance for nonbonded interactions
float
periodicBoxSizeX
;
// The X dimension of the periodic box
float
periodicBoxSizeY
;
// The Y dimension of the periodic box
float
periodicBoxSizeZ
;
// The Z dimension of the periodic box
float
invPeriodicBoxSizeX
;
// The 1 over the X dimension of the periodic box
float
invPeriodicBoxSizeY
;
// The 1 over the Y dimension of the periodic box
float
invPeriodicBoxSizeZ
;
// The 1 over the Z dimension of the periodic box
float
recipBoxSizeX
;
// The X dimension of the reciprocal box for Ewald summation
float
recipBoxSizeY
;
// The Y dimension of the reciprocal box for Ewald summation
float
recipBoxSizeZ
;
// The Z dimension of the reciprocal box for Ewald summation
float
cellVolume
;
// Ewald parameter alpha (a.k.a. kappa)
float
reactionFieldK
;
// Constant for reaction field correction
float
reactionFieldC
;
// Constant for reaction field correction
float4
*
pSigEps4
;
// sigma, eps, lambda. charge
int
bornRadiiScalingMethod
;
// flag for method to use scaling radii (0=none,1=quintic spline)
float
quinticLowerLimitFactor
;
// lower limit factor for quintic spline
float
quinticUpperLimit
;
// upper limit for quintic spline
float
*
pSwitchDerivative
;
// switch deriviatives for quintic spline
float
*
pNonPolarScalingFactors
;
// non-polar scaling factors
};
#endif // FREE_ENERGY_CUDA_TYPES_H
plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyGpuTypes.h
deleted
100755 → 0
View file @
c775bd19
#ifndef __FREE_ENERGY_GPUTYPES_H__
#define __FREE_ENERGY_GPUTYPES_H__
/* -------------------------------------------------------------------------- *
* OpenMMFreeEnergy *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "kernels/gputypes.h"
#include "freeEnergyCudaTypes.h"
#include <map>
typedef
std
::
map
<
int
,
float
>
MapIntFloat
;
typedef
MapIntFloat
::
const_iterator
MapIntFloatCI
;
struct
_freeEnergyGpuContext
{
_gpuContext
*
gpuContext
;
cudaFreeEnergyGmxSimulation
freeEnergySim
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusions
;
CUDAStream
<
float4
>*
psSigEps4
;
CUDAStream
<
int4
>*
psLJ14ID
;
CUDAStream
<
float4
>*
psLJ14Parameter
;
CUDAStream
<
float
>*
psSwitchDerivative
;
CUDAStream
<
float
>*
psNonPolarScalingFactors
;
FILE
*
log
;
};
typedef
struct
_freeEnergyGpuContext
*
freeEnergyGpuContext
;
// Function prototypes
extern
"C"
freeEnergyGpuContext
freeEnergyGpuInit
(
_gpuContext
*
gpu
);
extern
"C"
void
freeEnergyGpuShutDown
(
freeEnergyGpuContext
gpu
);
extern
"C"
void
freeEnergyGpuSetConstants
(
freeEnergyGpuContext
gpu
);
extern
"C"
unsigned
int
getThreadsPerBlockFEP
(
freeEnergyGpuContext
freeEnergyGpu
,
unsigned
int
sharedMemoryPerThread
,
unsigned
int
sharedMemoryPerBlock
);
#endif // __FREE_ENERGY_GPUTYPES_H__
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "kernels/gputypes.h"
#include "kernels/cudatypes.h"
#include "kernels/cudaKernels.h"
#include "GpuFreeEnergyCudaKernels.h"
#include "openmm/OpenMMException.h"
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <iostream>
#include <sstream>
#define USE_SOFTCORE_LJ
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
q
;
float
sig
;
float
eps
;
float
br
;
float
softCoreLJLambda
;
float
fx
;
float
fy
;
float
fz
;
float
fb
;
};
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaFreeEnergyGmxSimulation
feSimDev
;
void
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim
(
freeEnergyGpuContext
freeEnergyGpu
){
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
freeEnergyGpu
->
gpuContext
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreGpu1Sim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
feSimDev
,
&
freeEnergyGpu
->
freeEnergySim
,
sizeof
(
cudaFreeEnergyGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreGpu1Sim copy to feSimDev failed"
);
}
// Include versions of the kernel for N^2 calculations with softcore LJ.
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2##b
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_SOFTCORE_LJ
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
#undef USE_SOFTCORE_LJ
#undef USE_OUTPUT_BUFFER_PER_WARP
// Include versions of the kernel with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
// Include versions of the kernel with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
/**
*
* Calculate Born radii and first GBSA loop forces/energy
*
* @param gpu gpu context
*
*/
void
kCalculateCDLJObcGbsaSoftcoreForces1
(
freeEnergyGpuContext
freeEnergyGpu
)
{
unsigned
int
threadsPerBlock
;
static
unsigned
int
threadsPerBlockPerMethod
[
3
]
=
{
0
,
0
,
0
};
static
unsigned
int
natoms
[
3
]
=
{
0
,
0
,
0
};
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
unsigned
int
methodIndex
=
static_cast
<
unsigned
int
>
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
);
if
(
methodIndex
>
2
){
throw
OpenMM
::
OpenMMException
(
"kCalculateCDLJObcGbsaSoftcoreForces1 method index invalid."
);
}
if
(
natoms
[
methodIndex
]
!=
gpu
->
natoms
){
unsigned
int
extra
=
methodIndex
==
0
?
0
:
sizeof
(
float
);
threadsPerBlockPerMethod
[
methodIndex
]
=
std
::
min
(
getThreadsPerBlockFEP
(
freeEnergyGpu
,
(
sizeof
(
Atom
)
+
extra
),
gpu
->
sharedMemoryPerBlock
),
gpu
->
sim
.
nonbond_threads_per_block
);
natoms
[
methodIndex
]
=
gpu
->
natoms
;
}
threadsPerBlock
=
threadsPerBlockPerMethod
[
methodIndex
];
switch
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
)
{
case
FREE_ENERGY_NO_CUTOFF
:
// use softcore LJ potential
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Atom
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Atom
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaSoftcoreForces1"
);
break
;
case
FREE_ENERGY_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaSoftcoreCutoffForces1"
);
break
;
case
FREE_ENERGY_PERIODIC
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaSoftcorePeriodicForces1"
);
break
;
}
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evalauating nonbonded forces and the first stage of GBSA.
* It is included several times in kCalculateCDLJObcGbsaForces1.cu with different #defines to generate
* different versions of the kernels.
*/
#define USE_SOFTCORE_LJ
#ifdef USE_SOFTCORE_LJ
#include "kSoftcoreLJ.h"
#endif
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
#ifdef DEBUG
void
METHOD_NAME
(
kCalculateCDLJObcGbsaSoftcore
,
Forces1_kernel
)(
unsigned
int
*
workUnit
,
float4
*
pdE1
,
float4
*
pdE2
)
#else
void
METHOD_NAME
(
kCalculateCDLJObcGbsaSoftcore
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
#endif
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
float
CDLJObcGbsa_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
blockDim
.
x
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float4
a
=
feSimDev
.
pSigEps4
[
i
];
float
softCoreLJLambda
=
a
.
z
;
float
br
=
cSim
.
pBornRadii
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
float4
af
;
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
af
.
w
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
a
.
w
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
sA
[
threadIdx
.
x
].
br
=
br
;
sA
[
threadIdx
.
x
].
softCoreLJLambda
=
softCoreLJLambda
;
float
q2
=
cSim
.
preFactor
*
a
.
w
;
a
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
invR
=
1
.
0
f
/
sqrtf
(
r2
);
float
sig
=
a
.
x
+
psA
[
j
].
sig
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
j
].
softCoreLJLambda
,
&
CDLJObcGbsa_energy
);
#else
// CDLJ part
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
float
factorX
=
a
.
w
*
psA
[
j
].
q
*
invR
;
dEdR
+=
factorX
;
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
expf
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrtf
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
j
].
q
)
/
denominator
;
#ifdef USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
(
x
+
j
)
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
(
x
+
j
)
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
}
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
// Add Forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
}
else
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
invR
=
1
.
0
f
/
sqrtf
(
r2
);
float
sig
=
a
.
x
+
psA
[
j
].
sig
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
j
].
softCoreLJLambda
,
&
CDLJObcGbsa_energy
);
//float dEdR = getSoftCoreLJMod( (invR*sig), eps, softCoreLJLambda, psA[j].softCoreLJLambda, &CDLJObcGbsa_energy );
#else
// CDLJ part
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
float
factorX
=
a
.
w
*
psA
[
j
].
q
*
invR
;
dEdR
+=
factorX
;
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
if
(
!
(
excl
&
0x1
))
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
expf
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrtf
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
j
].
q
)
/
denominator
;
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
}
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
// Add Forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
#endif
float4
of
=
cSim
.
pForce4
[
offset
];
float
bf
=
cSim
.
pBornForce
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
bf
+=
af
.
w
;
cSim
.
pForce4
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
bf
;
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float4
temp1
=
feSimDev
.
pSigEps4
[
j
];
sA
[
threadIdx
.
x
].
br
=
cSim
.
pBornRadii
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp1
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
softCoreLJLambda
=
temp1
.
z
;
}
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fb
=
0
.
0
f
;
float
q2
=
a
.
w
*
cSim
.
preFactor
;
a
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
//else if (flags == 0xFFFFFFFF)
else
if
(
flags
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
invR
=
1
.
0
f
/
sqrtf
(
r2
);
float
sig
=
a
.
x
+
psA
[
tj
].
sig
;
float
eps
=
a
.
y
*
psA
[
tj
].
eps
;
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
tj
].
softCoreLJLambda
,
&
CDLJObcGbsa_energy
);
#else
// CDLJ part
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
float
factorX
=
a
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
+=
factorX
;
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
expf
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrtf
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
tj
].
q
)
/
denominator
;
#ifdef USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
(
y
+
tj
)
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
(
y
+
tj
)
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
}
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
energy
+=
CDLJObcGbsa_energy
;
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
invR
=
1
.
0
f
/
sqrtf
(
r2
);
float
sig
=
a
.
x
+
psA
[
j
].
sig
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
j
].
softCoreLJLambda
,
&
CDLJObcGbsa_energy
);
#else
// CDLJ part
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
float
factorX
=
a
.
w
*
psA
[
j
].
q
*
invR
;
dEdR
+=
factorX
;
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
expf
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrtf
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
j
].
q
)
/
denominator
;
#ifdef USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
(
y
+
j
)
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
(
y
+
j
)
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
}
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
// Sum the Born forces.
tempBuffer
[
threadIdx
.
x
]
=
dGpol_dalpha2_ij
*
br
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fb
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
energy
+=
CDLJObcGbsa_energy
;
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tempBuffer
[
threadIdx
.
x
]
=
dx
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
tempBuffer
[
threadIdx
.
x
]
=
dy
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
tempBuffer
[
threadIdx
.
x
]
=
dz
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
}
}
}
#endif
}
else
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
invR
=
1
.
0
f
/
sqrtf
(
r2
);
float
sig
=
a
.
x
+
psA
[
tj
].
sig
;
float
eps
=
a
.
y
*
psA
[
tj
].
eps
;
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
tj
].
softCoreLJLambda
,
&
CDLJObcGbsa_energy
);
#else
// CDLJ part
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
float
factorX
=
a
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
+=
factorX
;
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
if
(
!
(
excl
&
0x1
))
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
expf
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrtf
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
tj
].
q
)
/
denominator
;
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
}
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
energy
+=
CDLJObcGbsa_energy
;
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
#endif
float4
of
=
cSim
.
pForce4
[
offset
];
float
bf
=
cSim
.
pBornForce
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
bf
+=
af
.
w
;
cSim
.
pForce4
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
bf
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
#else
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
#endif
of
=
cSim
.
pForce4
[
offset
];
bf
=
cSim
.
pBornForce
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
bf
+=
sA
[
threadIdx
.
x
].
fb
;
cSim
.
pForce4
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
bf
;
lasty
=
y
;
}
pos
++
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "GpuFreeEnergyCudaKernels.h"
#include "freeEnergyGpuTypes.h"
#include "openmm/OpenMMException.h"
#include <cuda.h>
#include <iostream>
#include <sstream>
#define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaFreeEnergyGmxSimulation
gbviSimDev
;
void
SetCalculateGBVISoftcoreBornSumGpuSim
(
freeEnergyGpuContext
freeEnergyGpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
freeEnergyGpu
->
gpuContext
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateGBVISoftcoreBornSumGpuSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
gbviSimDev
,
&
freeEnergyGpu
->
freeEnergySim
,
sizeof
(
cudaFreeEnergyGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateGBVISoftcoreBornSumGpuSim copy to feSim failed"
);
}
// create, initialize and enter BornRadiusScaleFactors values (used to scale contribution of atoms to Born sum of other atoms)
extern
"C"
void
gpuSetGBVISoftcoreParameters
(
freeEnergyGpuContext
freeEnergyGpu
,
float
innerDielectric
,
float
solventDielectric
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
gamma
,
const
std
::
vector
<
float
>&
scaledRadii
,
const
std
::
vector
<
float
>&
bornRadiusScaleFactors
,
const
std
::
vector
<
float
>&
quinticSplineParameters
){
unsigned
int
numberOfParticles
=
radius
.
size
();
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
static
const
float
electricConstant
=
-
166.02691
f
;
double
tau
=
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
));
freeEnergyGpu
->
psSwitchDerivative
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SwitchDerivative"
);
freeEnergyGpu
->
freeEnergySim
.
pSwitchDerivative
=
freeEnergyGpu
->
psSwitchDerivative
->
_pDevData
;
// create gpuGBVISoftcore, load parameters, and track minimum softcore value
// gpuGBVISoftcore is not really being used (it was in the initial implementation) --
// will be removed in future once confirmed not needed
// check if quintic scaling to be applied
if
(
quinticSplineParameters
.
size
()
==
2
){
freeEnergyGpu
->
freeEnergySim
.
bornRadiiScalingMethod
=
1
;
freeEnergyGpu
->
freeEnergySim
.
quinticLowerLimitFactor
=
quinticSplineParameters
[
0
];
freeEnergyGpu
->
freeEnergySim
.
quinticUpperLimit
=
quinticSplineParameters
[
1
];
}
else
{
freeEnergyGpu
->
freeEnergySim
.
bornRadiiScalingMethod
=
0
;
freeEnergyGpu
->
freeEnergySim
.
quinticLowerLimitFactor
=
0.8
f
;
freeEnergyGpu
->
freeEnergySim
.
quinticUpperLimit
=
5.0
f
;
}
for
(
unsigned
int
ii
=
0
;
ii
<
bornRadiusScaleFactors
.
size
();
ii
++
){
(
*
gpu
->
psGBVIData
)[
ii
].
x
=
radius
[
ii
];
(
*
gpu
->
psGBVIData
)[
ii
].
y
=
scaledRadii
[
ii
];
(
*
gpu
->
psGBVIData
)[
ii
].
z
=
tau
*
gamma
[
ii
];
(
*
gpu
->
psGBVIData
)[
ii
].
w
=
bornRadiusScaleFactors
[
ii
];
(
*
gpu
->
psBornRadii
)[
ii
]
=
0.0
f
;
(
*
freeEnergyGpu
->
psSwitchDerivative
)[
ii
]
=
0.0
f
;
}
// Dummy out extra atom data
for
(
unsigned
int
ii
=
bornRadiusScaleFactors
.
size
();
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
(
*
gpu
->
psGBVIData
)[
ii
].
x
=
0.01
f
;
(
*
gpu
->
psGBVIData
)[
ii
].
y
=
0.01
f
;
(
*
gpu
->
psGBVIData
)[
ii
].
z
=
0.0
f
;
(
*
gpu
->
psGBVIData
)[
ii
].
w
=
0.0
f
;
(
*
gpu
->
psBornRadii
)[
ii
]
=
0.0
f
;
(
*
freeEnergyGpu
->
psSwitchDerivative
)[
ii
]
=
0.0
f
;
}
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
// diagnostics
if
(
freeEnergyGpu
->
log
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"GBVISoftcore: part.=%u padded=%u sclMeth=%d
\n
"
,
static_cast
<
unsigned
int
>
(
bornRadiusScaleFactors
.
size
()),
static_cast
<
unsigned
int
>
(
gpu
->
sim
.
paddedNumberOfAtoms
),
freeEnergyGpu
->
freeEnergySim
.
bornRadiiScalingMethod
);
if
(
quinticSplineParameters
.
size
()
==
2
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"QuinticScaling: LwFct=%8.3f UpLmt=[%12.5e (nm) %12.5e]
\n
"
,
freeEnergyGpu
->
freeEnergySim
.
quinticLowerLimitFactor
,
powf
(
freeEnergyGpu
->
freeEnergySim
.
quinticUpperLimit
,
-
0.3333333
f
),
freeEnergyGpu
->
freeEnergySim
.
quinticUpperLimit
);
}
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"gpuSetGBVISoftcoreParameters: preFactor=%14.6e elecCnstnt=%.4f frcCnvrsnFctr=%.4f tau=%.4f.
\n
"
,
gpu
->
sim
.
preFactor
,
2.0
f
*
electricConstant
,
gpu
->
sim
.
forceConversionFactor
,
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
);
#ifdef PARAMETER_PRINT
int
maxPrint
=
MAX_PARAMETER_PRINT
;
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
" radius scaled radius tau*gamma lambda
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
bornRadiusScaleFactors
.
size
();
ii
++
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"%6u %14.7e %14.7e %14.7e %14.7e
\n
"
,
ii
,
(
*
gpu
->
psGBVIData
)[
ii
].
x
,
(
*
gpu
->
psGBVIData
)[
ii
].
y
,
(
*
gpu
->
psGBVIData
)[
ii
].
z
,
(
*
gpu
->
psGBVIData
)[
ii
].
w
);
if
(
ii
==
maxPrint
){
ii
=
bornRadiusScaleFactors
.
size
()
-
maxPrint
;
if
(
ii
<
maxPrint
)
ii
=
maxPrint
;
}
}
unsigned
int
offset
=
gpu
->
sim
.
paddedNumberOfAtoms
-
MAX_PARAMETER_PRINT
;
if
(
offset
>
0
&&
gpu
->
sim
.
paddedNumberOfAtoms
>
bornRadiusScaleFactors
.
size
()
){
for
(
unsigned
int
ii
=
offset
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"%6u %14.7e %14.7e %14.7e %14.7e
\n
"
,
ii
,
(
*
gpu
->
psGBVIData
)[
ii
].
x
,
(
*
gpu
->
psGBVIData
)[
ii
].
y
,
(
*
gpu
->
psGBVIData
)[
ii
].
z
,
(
*
gpu
->
psGBVIData
)[
ii
].
w
);
}
}
#endif
}
gpu
->
psGBVIData
->
Upload
();
gpu
->
psBornRadii
->
Upload
();
freeEnergyGpu
->
psSwitchDerivative
->
Upload
();
return
;
}
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
r
;
float
sr
;
float
sum
;
float
gamma
;
float
bornRadiusScaleFactor
;
};
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kClearGBVISoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornSum
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearGBVISoftcoreBornSum
(
gpuContext
gpu
)
{
kClearGBVISoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVISoftcoreBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float4
gbviData
=
cSim
.
pGBVIData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
ratio
=
(
gbviData
.
x
/
bornRadius
);
float
ratio3
=
ratio
*
ratio
*
ratio
;
energy
-=
gbviData
.
z
*
ratio3
;
// gbviData.z = gamma*tau
totalForce
+=
(
3.0
f
*
gbviData
.
z
*
ratio3
)
/
bornRadius
;
// 'cavity' term
float
br2
=
bornRadius
*
bornRadius
;
totalForce
*=
(
1.0
f
/
3.0
f
)
*
br2
*
br2
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
void
kReduceGBVISoftcoreBornForces
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
kReduceGBVISoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVISoftcoreBornForces"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVISoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
;
float4
atom
=
cSim
.
pGBVIData
[
pos
];
// Get summed Born data
for
(
int
i
=
0
;
i
<
cSim
.
nonbondOutputBuffers
;
i
++
)
{
sum
+=
*
pSt
;
pSt
+=
cSim
.
stride
;
}
// Now calculate Born radius
float
Rinv
=
1.0
f
/
atom
.
x
;
sum
=
Rinv
*
Rinv
*
Rinv
-
sum
;
cSim
.
pBornRadii
[
pos
]
=
pow
(
sum
,
(
-
1.0
f
/
3.0
f
)
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceGBVISoftcoreBornSum
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
kReduceGBVISoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVISoftcoreBornSum"
);
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateGBVISoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateGBVISoftcoreBornSum.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateGBVISoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateGBVISoftcoreBornSum.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateGBVISoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVISoftcoreBornSum.h"
__device__
void
quinticSpline
(
float
x
,
float
rl
,
float
ru
,
float
*
outValue
,
float
*
outDerivative
)
{
float
numerator
=
x
-
rl
;
float
denominator
=
ru
-
rl
;
float
ratio
=
numerator
/
denominator
;
float
ratio2
=
ratio
*
ratio
;
float
ratio3
=
ratio2
*
ratio
;
*
outValue
=
1.0
f
+
ratio3
*
(
-
10.
f
+
3.0
f
*
ratio
*
(
5.0
f
-
2.0
f
*
ratio
));
*
outDerivative
=
-
30.0
f
*
ratio2
*
(
1.0
f
+
ratio
*
(
ratio
-
2.0
f
))
/
denominator
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornSumQuinticScaling_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
;
float4
atom
=
cSim
.
pGBVIData
[
pos
];
// Get summed Born data
for
(
int
i
=
0
;
i
<
cSim
.
nonbondOutputBuffers
;
i
++
)
{
sum
+=
*
pSt
;
pSt
+=
cSim
.
stride
;
}
// Now calculate Born radius
float
Rinv
=
1.0
f
/
atom
.
x
;
float
r3
=
Rinv
*
Rinv
*
Rinv
;
float
splineL
=
gbviSimDev
.
quinticLowerLimitFactor
*
r3
;
float
switchDeriviative
;
if
(
sum
>
splineL
){
if
(
sum
<
r3
){
float
splineValue
,
splineDerivative
;
quinticSpline
(
sum
,
splineL
,
r3
,
&
splineValue
,
&
splineDerivative
);
switchDeriviative
=
splineValue
-
(
r3
-
sum
)
*
splineDerivative
;
sum
=
(
r3
-
sum
)
*
splineValue
+
gbviSimDev
.
quinticUpperLimit
;
}
else
{
sum
=
gbviSimDev
.
quinticUpperLimit
;
switchDeriviative
=
0.0
f
;
}
}
else
{
sum
=
r3
-
sum
;
switchDeriviative
=
1.0
f
;
}
cSim
.
pBornRadii
[
pos
]
=
pow
(
sum
,
(
-
1.0
f
/
3.0
f
)
);
gbviSimDev
.
pSwitchDerivative
[
pos
]
=
switchDeriviative
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceGBVIBornSumQuinticScaling
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
kReduceGBVIBornSumQuinticScaling_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVIBornSumQuinticScaling_kernel"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornForcesQuinticScaling_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float4
gbviData
=
cSim
.
pGBVIData
[
pos
];
float
switchDeriv
=
gbviSimDev
.
pSwitchDerivative
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
ratio
=
(
gbviData
.
x
/
bornRadius
);
float
ratio3
=
ratio
*
ratio
*
ratio
;
energy
-=
gbviData
.
z
*
ratio3
;
totalForce
+=
(
3.0
f
*
gbviData
.
z
*
ratio3
)
/
bornRadius
;
// 'cavity' term
float
br2
=
bornRadius
*
bornRadius
;
totalForce
*=
(
1.0
f
/
3.0
f
)
*
br2
*
br2
*
switchDeriv
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
void
kReduceGBVIBornForcesQuinticScaling
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
//(void) fprintf( stderr, "kReduceObcGbsaBornForces %6d blks=%u bsf_reduce_threads_per_block=%5u %5u %5u %5u %5u\n",
// gpu->natoms, gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block, gpu->sim.bf_reduce_threads_per_block,
// GF1XX_THREADS_PER_BLOCK, GT2XX_THREADS_PER_BLOCK, G8X_THREADS_PER_BLOCK); fflush( stderr );
kReduceGBVIBornForcesQuinticScaling_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVIBornForcesQuinticScaling"
);
}
void
kPrintGBVISoftcore
(
freeEnergyGpuContext
freeEnergyGpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
//int maxPrint = gpu->natoms;
gpu
->
psGBVIData
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psBornForce
->
Download
();
gpu
->
psPosq4
->
Download
();
CUDAStream
<
float
>*
switchDeriviative
=
freeEnergyGpu
->
psSwitchDerivative
;
CUDAStream
<
float4
>*
sigEps4
=
freeEnergyGpu
->
psSigEps4
;
switchDeriviative
->
Download
();
sigEps4
->
Download
();
(
void
)
fprintf
(
log
,
"kPrintGBVISoftcore Cuda Softcore bR bF swd prm sigeps4
\n
"
);
for
(
int
ii
=
0
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
log
,
"%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e
\n
"
,
ii
,
gpu
->
psBornRadii
->
_pSysData
[
ii
],
gpu
->
psBornForce
->
_pSysData
[
ii
],
switchDeriviative
->
_pSysData
[
ii
],
gpu
->
psGBVIData
->
_pSysData
[
ii
].
x
,
gpu
->
psGBVIData
->
_pSysData
[
ii
].
y
,
gpu
->
psGBVIData
->
_pSysData
[
ii
].
z
,
gpu
->
psGBVIData
->
_pSysData
[
ii
].
w
,
sigEps4
->
_pSysData
[
ii
].
x
,
sigEps4
->
_pSysData
[
ii
].
y
,
sigEps4
->
_pSysData
[
ii
].
z
,
sigEps4
->
_pSysData
[
ii
].
w
);
}
}
extern
__global__
void
kFindBlockBoundsCutoff_kernel
();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
);
void
kCalculateGBVISoftcoreBornSum
(
freeEnergyGpuContext
freeEnergyGpu
)
{
unsigned
int
threadsPerBlock
;
static
unsigned
int
threadsPerBlockPerMethod
[
3
]
=
{
0
,
0
,
0
};
static
unsigned
int
natoms
[
3
]
=
{
0
,
0
,
0
};
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
unsigned
int
methodIndex
=
static_cast
<
unsigned
int
>
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
);
if
(
methodIndex
>
2
){
throw
OpenMM
::
OpenMMException
(
"kCalculateGBVISoftcoreBornSum method index invalid."
);
}
if
(
natoms
[
methodIndex
]
!=
gpu
->
natoms
){
unsigned
int
extra
=
methodIndex
==
0
?
0
:
sizeof
(
float
);
threadsPerBlockPerMethod
[
methodIndex
]
=
std
::
min
(
getThreadsPerBlockFEP
(
freeEnergyGpu
,
(
sizeof
(
Atom
)
+
extra
),
gpu
->
sharedMemoryPerBlock
),
gpu
->
sim
.
nonbond_threads_per_block
);
natoms
[
methodIndex
]
=
gpu
->
natoms
;
}
threadsPerBlock
=
threadsPerBlockPerMethod
[
methodIndex
];
kClearGBVISoftcoreBornSum
(
gpu
);
LAUNCHERROR
(
"kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum"
);
switch
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
)
{
case
FREE_ENERGY_NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateGBVISoftcoreN2ByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Atom
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pWorkUnit
);
}
else
{
kCalculateGBVISoftcoreN2BornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Atom
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pWorkUnit
);
}
break
;
case
FREE_ENERGY_CUTOFF
:
kFindBlockBoundsCutoff_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsCutoff"
);
kFindBlocksWithInteractionsCutoff_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsCutoff"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
unsigned
int
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVISoftcoreCutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
FREE_ENERGY_PERIODIC
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
unsigned
int
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVISoftcorePeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
default:
throw
OpenMM
::
OpenMMException
(
"Nonbonded softcore method not recognized."
);
}
LAUNCHERROR
(
"kCalculateGBVISoftcoreBornSum"
);
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for calculating Born sums. It is included
* several times in kCalculateGBVIBornSum.cu with different #defines to generate
* different versions of the kernels.
*/
#include "kCalculateGBVIAux.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateGBVISoftcore
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
r
;
// forces tgx into interval [0,31]
// forces tbx 0
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float4
ar
=
cSim
.
pGBVIData
[
i
];
// Local atom vr, sr
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
ar
.
x
;
sA
[
threadIdx
.
x
].
sr
=
ar
.
y
;
sA
[
threadIdx
.
x
].
bornRadiusScaleFactor
=
ar
.
w
;
float
bSum
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#if defined USE_CUTOFF
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
&&
j
!=
tgx
)
#else
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
&&
j
!=
tgx
)
#endif
{
bSum
+=
psA
[
j
].
bornRadiusScaleFactor
*
getGBVI_Volume
(
sqrtf
(
r2
),
ar
.
x
,
psA
[
j
].
sr
);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
bSum
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
bSum
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
unsigned
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float4
temp1
=
cSim
.
pGBVIData
[
j
];
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float4
ar
=
cSim
.
pGBVIData
[
i
];
// Local atom vr, sr
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
bornRadiusScaleFactor
=
temp1
.
w
;
sA
[
threadIdx
.
x
].
sum
=
0
.
0
f
;
apos
.
w
=
0
.
0
f
;
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
if
(
flags
==
0xFFFFFFFF
)
//else if (flags )
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_CUTOFF
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
)
#endif
{
r
=
sqrtf
(
r2
);
// psA[tj].sr = Sj
// ar.x = Ri
apos
.
w
+=
psA
[
tj
].
bornRadiusScaleFactor
*
getGBVI_Volume
(
r
,
ar
.
x
,
psA
[
tj
].
sr
);
psA
[
tj
].
sum
+=
ar
.
w
*
getGBVI_Volume
(
r
,
psA
[
tj
].
r
,
ar
.
y
);
}
tj
=
(
tj
-
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
tempBuffer
[
threadIdx
.
x
]
=
0
.
0
f
;
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_CUTOFF
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
)
#endif
{
r
=
sqrtf
(
r2
);
tempBuffer
[
threadIdx
.
x
]
=
ar
.
w
*
getGBVI_Volume
(
r
,
psA
[
j
].
r
,
ar
.
y
);
apos
.
w
+=
psA
[
j
].
bornRadiusScaleFactor
*
getGBVI_Volume
(
r
,
ar
.
x
,
psA
[
j
].
sr
);
}
// Sum the terms.
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
sum
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
}
}
}
#endif
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
sA
[
threadIdx
.
x
].
sum
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
sA
[
threadIdx
.
x
].
sum
;
#endif
}
pos
++
;
}
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include "GpuFreeEnergyCudaKernels.h"
#include "openmm/OpenMMException.h"
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
r
;
float
sr
;
float
fx
;
float
fy
;
float
fz
;
float
fb
;
float
bornRadiusScaleFactor
;
};
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateGBVISoftcoreForces2Sim
(
freeEnergyGpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
gpuContext
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateGBVISoftcoreForces2Sim copy to cSim failed"
);
}
#include "kCalculateGBVIAux.h"
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
void
kCalculateGBVISoftcoreForces2
(
freeEnergyGpuContext
freeEnergyGpu
)
{
unsigned
int
threadsPerBlock
;
static
unsigned
int
threadsPerBlockPerMethod
[
3
]
=
{
0
,
0
,
0
};
static
unsigned
int
natoms
[
3
]
=
{
0
,
0
,
0
};
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
unsigned
int
methodIndex
=
static_cast
<
unsigned
int
>
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
);
if
(
methodIndex
>
2
){
throw
OpenMM
::
OpenMMException
(
"kCalculateGBVISoftcoreForces2 method index invalid."
);
}
if
(
natoms
[
methodIndex
]
!=
gpu
->
natoms
){
unsigned
int
extra
=
methodIndex
==
0
?
0
:
sizeof
(
float3
);
threadsPerBlockPerMethod
[
methodIndex
]
=
std
::
min
(
getThreadsPerBlockFEP
(
freeEnergyGpu
,
(
sizeof
(
Atom
)
+
extra
),
gpu
->
sharedMemoryPerBlock
),
gpu
->
sim
.
nonbond_threads_per_block
);
natoms
[
methodIndex
]
=
gpu
->
natoms
;
}
threadsPerBlock
=
threadsPerBlockPerMethod
[
methodIndex
];
switch
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
)
{
case
FREE_ENERGY_NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
threadsPerBlock
,
sizeof
(
Atom
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateGBVISoftcoreN2Forces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
threadsPerBlock
,
sizeof
(
Atom
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pWorkUnit
);
break
;
case
FREE_ENERGY_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcoreCutoffByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVISoftcoreCutoffForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
FREE_ENERGY_PERIODIC
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcorePeriodicByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVISoftcorePeriodicForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
threadsPerBlock
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateGBVISoftcoreForces2"
);
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evaluating the second stage of GB/VI. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateGBVISoftcore
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
blockDim
.
x
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float4
ar
=
cSim
.
pGBVIData
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
float3
af
;
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
ar
.
x
;
sA
[
threadIdx
.
x
].
sr
=
ar
.
y
;
sA
[
threadIdx
.
x
].
bornRadiusScaleFactor
=
ar
.
w
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
for
(
unsigned
int
j
=
(
tgx
+
1
)
&
(
GRID
-
1
);
j
!=
tgx
;
j
=
(
j
+
1
)
&
(
GRID
-
1
))
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrtf
(
r2
);
// Atom I Born forces and sum
float
dE
=
psA
[
j
].
bornRadiusScaleFactor
*
getGBVI_dE2
(
r
,
ar
.
x
,
psA
[
j
].
sr
,
fb
);
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
(
i
==
(
x
+
j
))
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
(
i
==
(
x
+
j
))
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
j
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
j
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
j
].
fz
+=
d
;
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4
[
offset
]
=
of
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float4
temp1
=
cSim
.
pGBVIData
[
j
];
float
fb
=
cSim
.
pBornForce
[
j
];
sA
[
threadIdx
.
x
].
fb
=
fb
;
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
bornRadiusScaleFactor
=
temp1
.
w
;
}
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
//else if (flags == 0xFFFFFFFF)
else
if
(
flags
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrtf
(
r2
);
float
dE
=
psA
[
tj
].
bornRadiusScaleFactor
*
getGBVI_dE2
(
r
,
ar
.
x
,
psA
[
tj
].
sr
,
fb
);
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
tj
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
tj
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
tj
].
fz
+=
d
;
// Atom J Born sum term
dE
=
ar
.
w
*
getGBVI_dE2
(
r
,
psA
[
tj
].
r
,
ar
.
y
,
psA
[
tj
].
fb
);
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
)
#endif
{
dE
=
0
.
0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrtf
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
float
dE
=
psA
[
j
].
bornRadiusScaleFactor
*
getGBVI_dE2
(
r
,
ar
.
x
,
psA
[
j
].
sr
,
fb
);
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
tempBuffer
[
threadIdx
.
x
].
x
=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
tempBuffer
[
threadIdx
.
x
].
y
=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
tempBuffer
[
threadIdx
.
x
].
z
=
d
;
// Atom J Born sum term
dE
=
ar
.
w
*
getGBVI_dE2
(
r
,
psA
[
j
].
r
,
ar
.
y
,
psA
[
j
].
fb
);
#if defined USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
)
#endif
{
dE
=
0
.
0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
tempBuffer
[
threadIdx
.
x
].
x
+=
dx
;
tempBuffer
[
threadIdx
.
x
].
y
+=
dy
;
tempBuffer
[
threadIdx
.
x
].
z
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
// Sum the forces on atom j.
if
(
tgx
%
2
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
1
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
1
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
1
].
z
;
}
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
2
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
2
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
2
].
z
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
4
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
4
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
4
].
z
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
8
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
8
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
8
].
z
;
}
if
(
tgx
==
0
)
{
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
].
x
+
tempBuffer
[
threadIdx
.
x
+
16
].
x
;
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
].
y
+
tempBuffer
[
threadIdx
.
x
+
16
].
y
;
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
].
z
+
tempBuffer
[
threadIdx
.
x
+
16
].
z
;
}
}
}
}
#endif
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4
[
offset
]
=
of
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#endif
}
lasty
=
y
;
pos
++
;
}
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "GpuFreeEnergyCudaKernels.h"
#include "freeEnergyGpuTypes.h"
#include <cudatypes.h>
#include "kSoftcoreLJ.h"
#define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaFreeEnergyGmxSimulation
feSim
;
void
SetCalculateLocalSoftcoreGpuSim
(
freeEnergyGpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
gpuContext
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateLocalSoftcoreGpuSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
feSim
,
&
gpu
->
freeEnergySim
,
sizeof
(
cudaFreeEnergyGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateLocalSoftcoreGpuSim copy to feSim failed"
);
}
extern
"C"
void
gpuSetLJ14SoftcoreParameters
(
freeEnergyGpuContext
gpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
qProd
,
const
std
::
vector
<
float
>&
softcoreLJLambdaArray
){
unsigned
int
LJ14s
=
atom1
.
size
();
gpu
->
freeEnergySim
.
LJ14_count
=
LJ14s
;
gpu
->
psLJ14ID
=
new
CUDAStream
<
int4
>
(
LJ14s
,
1
,
"LJ14SoftcoreID"
);
CUDAStream
<
int4
>*
psLJ14ID
=
gpu
->
psLJ14ID
;
gpu
->
freeEnergySim
.
pLJ14ID
=
psLJ14ID
->
_pDevData
;
gpu
->
psLJ14Parameter
=
new
CUDAStream
<
float4
>
(
LJ14s
,
1
,
"LJ14SoftcoreParameter"
);
CUDAStream
<
float4
>*
psLJ14Parameter
=
gpu
->
psLJ14Parameter
;
gpu
->
freeEnergySim
.
pLJ14Parameter
=
psLJ14Parameter
->
_pDevData
;
std
::
vector
<
int
>
outputBufferCounter
(
gpu
->
gpuContext
->
sim
.
atoms
,
0
);
for
(
int
ii
=
0
;
ii
<
LJ14s
;
ii
++
){
(
*
psLJ14ID
)[
ii
].
x
=
atom1
[
ii
];
(
*
psLJ14ID
)[
ii
].
y
=
atom2
[
ii
];
(
*
psLJ14ID
)[
ii
].
z
=
outputBufferCounter
[
atom1
[
ii
]]
++
;
(
*
psLJ14ID
)[
ii
].
w
=
outputBufferCounter
[
atom2
[
ii
]]
++
;
float
p0
,
p1
,
p2
,
p3
;
if
(
c12
[
ii
]
==
0.0
f
){
p0
=
0.0
f
;
p1
=
1.0
f
;
}
else
{
p0
=
c6
[
ii
]
*
c6
[
ii
]
/
c12
[
ii
];
p1
=
pow
(
c12
[
ii
]
/
c6
[
ii
],
1.0
f
/
6.0
f
);
}
p2
=
epsfac
*
qProd
[
ii
];
p3
=
softcoreLJLambdaArray
[
ii
];
(
*
psLJ14Parameter
)[
ii
].
x
=
p0
;
(
*
psLJ14Parameter
)[
ii
].
y
=
p1
;
(
*
psLJ14Parameter
)[
ii
].
z
=
p2
;
(
*
psLJ14Parameter
)[
ii
].
w
=
p3
;
}
// logging info
if
(
gpu
->
log
){
(
void
)
fprintf
(
gpu
->
log
,
"gpuSetLJ14SoftcoreParameters: number of 1-4 bonds=%5u
\n
"
,
LJ14s
);
#ifdef PARAMETER_PRINT
unsigned
int
maxPrint
=
MAX_PARAMETER_PRINT
;
for
(
unsigned
int
ii
=
0
;
ii
<
LJ14s
;
ii
++
){
(
void
)
fprintf
(
gpu
->
log
,
" %5d [%5d %5d %5d %5d] %15.7e %15.7e %15.7e %15.7e
\n
"
,
ii
,
(
*
psLJ14ID
)[
ii
].
x
,
(
*
psLJ14ID
)[
ii
].
y
,
(
*
psLJ14ID
)[
ii
].
z
,
(
*
psLJ14ID
)[
ii
].
w
,
(
*
psLJ14Parameter
)[
ii
].
x
,
(
*
psLJ14Parameter
)[
ii
].
y
,
(
*
psLJ14Parameter
)[
ii
].
z
/
epsfac
,
(
*
psLJ14Parameter
)[
ii
].
w
);
if
(
ii
==
maxPrint
){
(
void
)
fprintf
(
gpu
->
log
,
"
\n
"
);
ii
=
LJ14s
-
maxPrint
;
if
(
ii
<
maxPrint
)
ii
=
maxPrint
;
}
}
(
void
)
fprintf
(
gpu
->
log
,
"
\n
"
);
#endif
(
void
)
fflush
(
gpu
->
log
);
}
psLJ14ID
->
Upload
();
psLJ14Parameter
->
Upload
();
return
;
}
#define DOT3(v1, v2) (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z)
__global__
void
kCalculateLocalSoftcoreForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
float
energy
=
0.0
f
;
if
(
feSim
.
nonbondedMethod
==
NO_CUTOFF
)
{
while
(
pos
<
feSim
.
LJ14_count
)
{
int4
atom
=
feSim
.
pLJ14ID
[
pos
];
float4
LJ14
=
feSim
.
pLJ14Parameter
[
pos
];
float4
a1
=
cSim
.
pPosq
[
atom
.
x
];
float4
a2
=
cSim
.
pPosq
[
atom
.
y
];
float3
d
;
d
.
x
=
a1
.
x
-
a2
.
x
;
d
.
y
=
a1
.
y
-
a2
.
y
;
d
.
z
=
a1
.
z
-
a2
.
z
;
float
r2
=
DOT3
(
d
,
d
);
float
inverseR
=
1.0
f
/
sqrtf
(
r2
);
#ifdef USE_SOFTCORE_LJ
float
CDLJ_energy
=
0.0
f
;
float
dEdR
=
getSoftCoreLJ
(
r2
,
LJ14
.
y
,
LJ14
.
x
,
LJ14
.
w
,
LJ14
.
w
,
&
CDLJ_energy
);
energy
+=
CDLJ_energy
;
#else
float
sig2
=
inverseR
*
LJ14
.
y
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
LJ14
.
x
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
energy
+=
LJ14
.
x
*
(
sig6
-
1.0
f
)
*
sig6
;
#endif
energy
+=
LJ14
.
z
*
inverseR
;
dEdR
+=
LJ14
.
z
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
unsigned
int
offsetA
=
atom
.
x
+
atom
.
z
*
cSim
.
stride
;
unsigned
int
offsetB
=
atom
.
y
+
atom
.
w
*
cSim
.
stride
;
float4
forceA
=
cSim
.
pForce4
[
offsetA
];
float4
forceB
=
cSim
.
pForce4
[
offsetB
];
d
.
x
*=
dEdR
;
d
.
y
*=
dEdR
;
d
.
z
*=
dEdR
;
forceA
.
x
+=
d
.
x
;
forceA
.
y
+=
d
.
y
;
forceA
.
z
+=
d
.
z
;
forceB
.
x
-=
d
.
x
;
forceB
.
y
-=
d
.
y
;
forceB
.
z
-=
d
.
z
;
cSim
.
pForce4
[
offsetA
]
=
forceA
;
cSim
.
pForce4
[
offsetB
]
=
forceB
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
else
if
(
feSim
.
nonbondedMethod
==
CUTOFF
)
{
float
LJ14_energy
;
while
(
pos
<
feSim
.
LJ14_count
){
int4
atom
=
feSim
.
pLJ14ID
[
pos
];
float4
LJ14
=
feSim
.
pLJ14Parameter
[
pos
];
float4
a1
=
cSim
.
pPosq
[
atom
.
x
];
float4
a2
=
cSim
.
pPosq
[
atom
.
y
];
float3
d
;
d
.
x
=
a1
.
x
-
a2
.
x
;
d
.
y
=
a1
.
y
-
a2
.
y
;
d
.
z
=
a1
.
z
-
a2
.
z
;
float
r2
=
DOT3
(
d
,
d
);
float
inverseR
=
1.0
f
/
sqrtf
(
r2
);
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
LJ14
.
y
,
LJ14
.
x
,
LJ14
.
w
,
LJ14
.
w
,
&
LJ14_energy
);
#else
float
sig2
=
inverseR
*
LJ14
.
y
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
LJ14
.
x
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
LJ14_energy
=
LJ14
.
x
*
(
sig6
-
1.0
f
)
*
sig6
;
#endif
LJ14_energy
+=
LJ14
.
z
*
(
inverseR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
dEdR
+=
LJ14
.
z
*
(
inverseR
-
2.0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
*=
inverseR
*
inverseR
;
if
(
r2
>
feSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0.0
f
;
LJ14_energy
=
0.0
f
;
}
energy
+=
LJ14_energy
;
unsigned
int
offsetA
=
atom
.
x
+
atom
.
z
*
cSim
.
stride
;
unsigned
int
offsetB
=
atom
.
y
+
atom
.
w
*
cSim
.
stride
;
float4
forceA
=
cSim
.
pForce4
[
offsetA
];
float4
forceB
=
cSim
.
pForce4
[
offsetB
];
d
.
x
*=
dEdR
;
d
.
y
*=
dEdR
;
d
.
z
*=
dEdR
;
forceA
.
x
+=
d
.
x
;
forceA
.
y
+=
d
.
y
;
forceA
.
z
+=
d
.
z
;
forceB
.
x
-=
d
.
x
;
forceB
.
y
-=
d
.
y
;
forceB
.
z
-=
d
.
z
;
cSim
.
pForce4
[
offsetA
]
=
forceA
;
cSim
.
pForce4
[
offsetB
]
=
forceB
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
else
if
(
feSim
.
nonbondedMethod
==
PERIODIC
){
float
LJ14_energy
;
while
(
pos
<
feSim
.
LJ14_count
){
int4
atom
=
feSim
.
pLJ14ID
[
pos
];
float4
LJ14
=
feSim
.
pLJ14Parameter
[
pos
];
float4
a1
=
cSim
.
pPosq
[
atom
.
x
];
float4
a2
=
cSim
.
pPosq
[
atom
.
y
];
float3
d
;
d
.
x
=
a1
.
x
-
a2
.
x
;
d
.
y
=
a1
.
y
-
a2
.
y
;
d
.
z
=
a1
.
z
-
a2
.
z
;
d
.
x
-=
floorf
(
d
.
x
/
cSim
.
periodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
d
.
y
-=
floorf
(
d
.
y
/
cSim
.
periodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
d
.
z
-=
floorf
(
d
.
z
/
cSim
.
periodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
DOT3
(
d
,
d
);
float
inverseR
=
1.0
f
/
sqrtf
(
r2
);
#ifdef USE_SOFTCORE_LJ
float
dEdR
=
getSoftCoreLJ
(
r2
,
LJ14
.
y
,
LJ14
.
x
,
LJ14
.
w
,
LJ14
.
w
,
&
LJ14_energy
);
#else
float
sig2
=
inverseR
*
LJ14
.
y
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
LJ14
.
x
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
LJ14_energy
=
LJ14
.
x
*
(
sig6
-
1.0
f
)
*
sig6
;
#endif
LJ14_energy
+=
LJ14
.
z
*
(
inverseR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
dEdR
+=
LJ14
.
z
*
(
inverseR
-
2.0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
*=
inverseR
*
inverseR
;
if
(
r2
>
feSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0.0
f
;
LJ14_energy
=
0.0
f
;
}
energy
+=
LJ14_energy
;
unsigned
int
offsetA
=
atom
.
x
+
atom
.
z
*
cSim
.
stride
;
unsigned
int
offsetB
=
atom
.
y
+
atom
.
w
*
cSim
.
stride
;
float4
forceA
=
cSim
.
pForce4
[
offsetA
];
float4
forceB
=
cSim
.
pForce4
[
offsetB
];
d
.
x
*=
dEdR
;
d
.
y
*=
dEdR
;
d
.
z
*=
dEdR
;
forceA
.
x
+=
d
.
x
;
forceA
.
y
+=
d
.
y
;
forceA
.
z
+=
d
.
z
;
forceB
.
x
-=
d
.
x
;
forceB
.
y
-=
d
.
y
;
forceB
.
z
-=
d
.
z
;
cSim
.
pForce4
[
offsetA
]
=
forceA
;
cSim
.
pForce4
[
offsetB
]
=
forceB
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
void
kCalculateLocalSoftcoreForces
(
freeEnergyGpuContext
freeEnergyGpuContext
)
{
gpuContext
gpu
=
freeEnergyGpuContext
->
gpuContext
;
kCalculateLocalSoftcoreForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
localForces_threads_per_block
,
gpu
->
sim
.
localForces_threads_per_block
*
sizeof
(
Vectors
)
>>>
();
LAUNCHERROR
(
"kCalculateLocalSoftcoreForces"
);
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "freeEnergyGpuTypes.h"
#include "GpuFreeEnergyCudaKernels.h"
#include "openmm/OpenMMException.h"
#include <iostream>
#include <sstream>
#define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10
// device handles
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaFreeEnergyGmxSimulation
feSimDev
;
// write address of structs to devices
void
SetCalculateCDLJSoftcoreGpuSim
(
freeEnergyGpuContext
freeEnergyGpu
){
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
freeEnergyGpu
->
gpuContext
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateCDLJSoftcoreGpuSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
feSimDev
,
&
freeEnergyGpu
->
freeEnergySim
,
sizeof
(
cudaFreeEnergyGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateCDLJSoftcoreGpuSim copy to feSimDev failed"
);
}
extern
"C"
bool
gpuIsAvailableSoftcore
()
{
int
deviceCount
;
cudaGetDeviceCount
(
&
deviceCount
);
return
(
deviceCount
>
0
);
}
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
q
;
float
sig
;
float
eps
;
float
softCoreLJLambda
;
float
fx
;
float
fy
;
float
fz
;
};
// Include versions of the kernels for N^2 calculations with softcore LJ.
#define USE_SOFTCORE_LJ
#ifdef USE_SOFTCORE_LJ
#include "kSoftcoreLJ.h"
#endif
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2SoftcoreLJ##b
#undef USE_OUTPUT_BUFFER_PER_WARP
#include "kCalculateNonbondedSoftcore.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2SoftcoreLJByWarp##b
#include "kCalculateNonbondedSoftcore.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateNonbondedSoftcore.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateNonbondedSoftcore.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateNonbondedSoftcore.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateNonbondedSoftcore.h"
void
kCalculateCDLJSoftcoreForces
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
switch
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
)
{
case
FREE_ENERGY_NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJSoftcoreN2SoftcoreLJByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJSoftcoreN2SoftcoreLJForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJSoftcoreN2Forces"
);
break
;
case
FREE_ENERGY_CUTOFF
:
kFindBlockBoundsCutoff_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsCutoff"
);
kFindBlocksWithInteractionsCutoff_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsCutoff"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJSoftcoreCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJSoftcoreCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJSoftcoreCutoffForces"
);
break
;
case
FREE_ENERGY_PERIODIC
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJSoftcorePeriodicByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJSoftcorePeriodicForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJSoftcorePeriodicForces"
);
break
;
default:
throw
OpenMM
::
OpenMMException
(
"Nonbonded softcore method not recognized."
);
}
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.h
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evaluating nonbonded softcore forces. It is included
* several times in kCalculateNonbondedSoftcore.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateCDLJSoftcore
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
float
CDLJ_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float3
af
;
// Local atom fx, fy, fz
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
invR
;
float
sig
;
float
eps
;
float
dEdR
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float4
a
=
feSimDev
.
pSigEps4
[
i
];
float
softCoreLJLambda
=
a
.
z
;
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
a
.
w
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
sA
[
threadIdx
.
x
].
softCoreLJLambda
=
a
.
z
;
a
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrtf
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
#ifdef USE_SOFTCORE_LJ
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
j
].
softCoreLJLambda
,
&
CDLJ_energy
);
#else
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJ_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJ_energy
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
invR
;
CDLJ_energy
+=
a
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
CDLJ_energy
=
0
.
0
f
;
}
#endif
energy
+=
0
.
5
f
*
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
}
else
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrtf
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
#ifdef USE_SOFTCORE_LJ
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
j
].
softCoreLJLambda
,
&
CDLJ_energy
);
#else
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJ_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJ_energy
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
invR
;
CDLJ_energy
+=
a
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
CDLJ_energy
=
0
.
0
f
;
}
energy
+=
0
.
5
f
*
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4
[
offset
]
=
of
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
//float2 temp1 = cSim.pAttr[j];
float4
temp1
=
feSimDev
.
pSigEps4
[
j
];
//float temp3 = cSim.pSoftCoreLJLambda[j];
//float temp3 = softCoreLJLambdaArray[j];
float
temp3
=
temp1
.
z
;
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp1
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
softCoreLJLambda
=
temp3
;
}
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
a
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrtf
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
#ifdef USE_SOFTCORE_LJ
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
tj
].
softCoreLJLambda
,
&
CDLJ_energy
);
#else
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJ_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJ_energy
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
dEdR
+=
a
.
w
*
psA
[
tj
].
q
*
invR
;
CDLJ_energy
+=
a
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
CDLJ_energy
=
0
.
0
f
;
}
#endif
energy
+=
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrtf
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
#ifdef USE_SOFTCORE_LJ
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
j
].
softCoreLJLambda
,
&
CDLJ_energy
);
#else
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJ_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJ_energy
+=
a
.
w
*
psA
[
j
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
dEdR
+=
a
.
w
*
psA
[
j
].
q
*
invR
;
CDLJ_energy
+=
a
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
CDLJ_energy
=
0
.
0
f
;
}
#endif
energy
+=
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tempBuffer
[
threadIdx
.
x
].
x
=
dx
;
tempBuffer
[
threadIdx
.
x
].
y
=
dy
;
tempBuffer
[
threadIdx
.
x
].
z
=
dz
;
// Sum the forces on atom j.
if
(
tgx
%
2
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
1
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
1
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
1
].
z
;
}
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
2
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
2
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
2
].
z
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
4
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
4
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
4
].
z
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
8
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
8
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
8
].
z
;
}
if
(
tgx
==
0
)
{
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
].
x
+
tempBuffer
[
threadIdx
.
x
+
16
].
x
;
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
].
y
+
tempBuffer
[
threadIdx
.
x
+
16
].
y
;
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
].
z
+
tempBuffer
[
threadIdx
.
x
+
16
].
z
;
}
}
}
}
#endif
}
else
// bExclusion
{
// Read fixed atom data into registers and GRF
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrtf
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
#ifdef USE_SOFTCORE_LJ
dEdR
=
getSoftCoreLJ
(
r2
,
sig
,
eps
,
softCoreLJLambda
,
psA
[
tj
].
softCoreLJLambda
,
&
CDLJ_energy
);
#else
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJ_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#endif
#ifdef USE_CUTOFF
dEdR
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
feSimDev
.
reactionFieldK
*
r2
);
CDLJ_energy
+=
a
.
w
*
psA
[
tj
].
q
*
(
invR
+
feSimDev
.
reactionFieldK
*
r2
-
feSimDev
.
reactionFieldC
);
#else
dEdR
+=
a
.
w
*
psA
[
tj
].
q
*
invR
;
CDLJ_energy
+=
a
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
CDLJ_energy
=
0
.
0
f
;
}
energy
+=
CDLJ_energy
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4
[
offset
]
=
of
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4
[
offset
]
=
of
;
#endif
lasty
=
y
;
}
pos
++
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
deleted
100755 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include "freeEnergyGpuTypes.h"
#include "GpuFreeEnergyCudaKernels.h"
#include "kernels/cudaKernels.h"
#include "openmm/OpenMMException.h"
#include <cuda.h>
#define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
r
;
float
sr
;
float
sum
;
float
polarScaleData
;
};
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaFreeEnergyGmxSimulation
gbsaSimDev
;
extern
"C"
void
SetCalculateObcGbsaSoftcoreBornSumSim
(
freeEnergyGpuContext
freeEnergyGpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
freeEnergyGpu
->
gpuContext
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreBornSumSim copy to cSim failed."
);
status
=
cudaMemcpyToSymbol
(
gbsaSimDev
,
&
freeEnergyGpu
->
freeEnergySim
,
sizeof
(
cudaFreeEnergyGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreBornSumSim copy to gbsaSimDev failed."
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kClearObcGbsaSoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornSum
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kClearSoftcoreBornForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornForce
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearSoftcoreBornForces
(
gpuContext
gpu
)
{
// printf("kClearSoftcoreBornForces\n");
kClearSoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kClearSoftcoreBornForces"
);
}
void
kClearObcGbsaSoftcoreBornSum
(
gpuContext
gpu
)
{
// printf("kClearObcGbsaBornSum\n");
kClearObcGbsaSoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaSoftcoreBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
float2
obcData
=
cSim
.
pObcData
[
pos
];
float
nonPolarScaleData
=
gbsaSimDev
.
pNonPolarScalingFactors
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
r
=
(
obcData
.
x
+
cSim
.
dielectricOffset
+
cSim
.
probeRadius
);
float
ratio6
=
pow
((
obcData
.
x
+
cSim
.
dielectricOffset
)
/
bornRadius
,
6.0
f
);
float
saTerm
=
nonPolarScaleData
*
cSim
.
surfaceAreaFactor
*
r
*
r
*
ratio6
;
totalForce
+=
saTerm
/
bornRadius
;
// 1.102 == Temp mysterious fudge factor, FIX FIX FIX
energy
+=
saTerm
;
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
// correct for surface area factor of -6
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
}
void
kReduceObcGbsaSoftcoreBornForces
(
gpuContext
gpu
){
kReduceObcGbsaSoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaSoftcoreBornForces"
);
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaSoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
;
float2
atom
=
cSim
.
pObcData
[
pos
];
// Get summed Born data
for
(
int
i
=
0
;
i
<
cSim
.
nonbondOutputBuffers
;
i
++
){
sum
+=
*
pSt
;
pSt
+=
cSim
.
stride
;
}
// Now calculate Born radius and OBC term.
sum
*=
0.5
f
*
atom
.
x
;
float
sum2
=
sum
*
sum
;
float
sum3
=
sum
*
sum2
;
float
tanhSum
=
tanh
(
cSim
.
alphaOBC
*
sum
-
cSim
.
betaOBC
*
sum2
+
cSim
.
gammaOBC
*
sum3
);
float
nonOffsetRadii
=
atom
.
x
+
cSim
.
dielectricOffset
;
float
bornRadius
=
1.0
f
/
(
1.0
f
/
atom
.
x
-
tanhSum
/
nonOffsetRadii
);
float
obcChain
=
atom
.
x
*
(
cSim
.
alphaOBC
-
2.0
f
*
cSim
.
betaOBC
*
sum
+
3.0
f
*
cSim
.
gammaOBC
*
sum2
);
obcChain
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
/
nonOffsetRadii
;
cSim
.
pBornRadii
[
pos
]
=
bornRadius
;
cSim
.
pObcChain
[
pos
]
=
obcChain
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceObcGbsaSoftcoreBornSum
(
gpuContext
gpu
)
{
// printf("kReduceObcGbsaSoftcoreBornSum\n");
kReduceObcGbsaSoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
gpu
->
bRecalculateBornRadii
=
false
;
LAUNCHERROR
(
"kReduceObcGbsaSoftcoreBornSum"
);
}
/**
* Initialize parameters for Cuda Obc softcore
*
* @param freeEnergyGpu freeEnergyGpu context
* @param innerDielectric solute dielectric
* @param solventDielectric solvent dielectric
* @param radius intrinsic Born radii
* @param scale Obc scaling factors
* @param charge atomic charges (possibly overwritten by other methods?)
* @param nonPolarScalingFactors non-polar scaling factors
*
*/
extern
"C"
void
gpuSetObcSoftcoreParameters
(
freeEnergyGpuContext
freeEnergyGpu
,
float
innerDielectric
,
float
solventDielectric
,
float
nonPolarPrefactor
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
scale
,
const
std
::
vector
<
float
>&
charge
,
const
std
::
vector
<
float
>&
nonPolarScalingFactors
)
{
// ---------------------------------------------------------------------------------------
static
const
float
dielectricOffset
=
0.009
f
;
static
const
float
electricConstant
=
-
166.02691
f
;
static
const
std
::
string
methodName
=
"gpuSetObcSoftcoreParameters"
;
// ---------------------------------------------------------------------------------------
unsigned
int
numberOfParticles
=
radius
.
size
();
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
// initialize parameters
freeEnergyGpu
->
psNonPolarScalingFactors
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcSoftcoreNonPolarScaling"
);
freeEnergyGpu
->
freeEnergySim
.
pNonPolarScalingFactors
=
freeEnergyGpu
->
psNonPolarScalingFactors
->
_pDevData
;
gpu
->
sim
.
surfaceAreaFactor
=
-
6.0
f
*
PI
*
4.0
f
*
nonPolarPrefactor
;
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
for
(
unsigned
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
(
*
gpu
->
psObcData
)[
ii
].
x
=
radius
[
ii
]
-
dielectricOffset
;
(
*
gpu
->
psObcData
)[
ii
].
y
=
scale
[
ii
]
*
(
*
gpu
->
psObcData
)[
ii
].
x
;
(
*
gpu
->
psPosq4
)[
ii
].
w
=
charge
[
ii
];
(
*
gpu
->
psBornRadii
)[
ii
]
=
0.0
f
;
(
*
freeEnergyGpu
->
psNonPolarScalingFactors
)[
ii
]
=
nonPolarScalingFactors
[
ii
];
}
// diagnostics
if
(
freeEnergyGpu
->
log
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"%s %u %u
\n
"
,
methodName
.
c_str
(),
gpu
->
natoms
,
gpu
->
sim
.
paddedNumberOfAtoms
);
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"surfaceAreaFactor=%15.7e preFactor=%15.7e
\n
"
,
gpu
->
sim
.
surfaceAreaFactor
,
gpu
->
sim
.
preFactor
);
#ifdef PARAMETER_PRINT
int
maxPrint
=
MAX_PARAMETER_PRINT
;
for
(
unsigned
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
(
void
)
fprintf
(
freeEnergyGpu
->
log
,
"%6u %13.6e %13.6e %8.3f %8.3f
\n
"
,
ii
,
(
*
gpu
->
psObcData
)[
ii
].
x
,
(
*
gpu
->
psObcData
)[
ii
].
y
,
(
*
gpu
->
psPosq4
)[
ii
].
w
,
(
*
freeEnergyGpu
->
psNonPolarScalingFactors
)[
ii
]
);
if
(
ii
==
maxPrint
){
ii
=
numberOfParticles
-
maxPrint
;
if
(
ii
<
maxPrint
)
ii
=
maxPrint
;
}
}
#endif
}
// dummy out extra atom data
for
(
unsigned
int
ii
=
gpu
->
natoms
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
(
*
gpu
->
psObcData
)[
ii
].
x
=
0.01
f
;
(
*
gpu
->
psObcData
)[
ii
].
y
=
0.01
f
;
(
*
freeEnergyGpu
->
psNonPolarScalingFactors
)[
ii
]
=
0.0
f
;
(
*
gpu
->
psBornRadii
)[
ii
]
=
0.0
f
;
}
// load data to board
gpu
->
psObcData
->
Upload
();
gpu
->
psPosq4
->
Upload
();
gpu
->
psBornRadii
->
Upload
();
freeEnergyGpu
->
psNonPolarScalingFactors
->
Upload
();
return
;
}
void
kPrintObcGbsaSoftcore
(
freeEnergyGpuContext
freeEnergyGpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
){
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
int
maxPrint
=
gpu
->
natoms
;
(
void
)
fprintf
(
log
,
"kPrintObcGbsaSoftcore %s %d
\n
"
,
callId
.
c_str
(),
call
);
gpu
->
psObcData
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psBornForce
->
Download
();
gpu
->
psPosq4
->
Download
();
freeEnergyGpu
->
psNonPolarScalingFactors
->
Download
();
CUDAStream
<
float4
>*
sigEps4
=
freeEnergyGpu
->
psSigEps4
;
sigEps4
->
Download
();
(
void
)
fprintf
(
log
,
"BornSum Born radii & params
\n
"
);
for
(
int
ii
=
0
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
//(void) fprintf( log, "%6d prm[%15.7e %15.7e %15.7e %15.7e] [%15.7e %15.7e %15.7e %15.7e] bR=%15.7e bF=%15.7e swDrv=%3.1f x[%8.3f %8.3f %8.3f %15.7f]\n",
(
void
)
fprintf
(
log
,
"%6d prm[%15.7e %15.7e %15.7e] sig/eps4[%15.7e %15.7e %15.7e %15.7e] bR=%15.7e bF=%15.7e
\n
"
,
ii
,
gpu
->
psObcData
->
_pSysData
[
ii
].
x
,
gpu
->
psObcData
->
_pSysData
[
ii
].
y
,
freeEnergyGpu
->
psNonPolarScalingFactors
->
_pSysData
[
ii
],
sigEps4
->
_pSysData
[
ii
].
x
,
sigEps4
->
_pSysData
[
ii
].
y
,
sigEps4
->
_pSysData
[
ii
].
z
,
sigEps4
->
_pSysData
[
ii
].
w
,
gpu
->
psBornRadii
->
_pSysData
[
ii
],
gpu
->
psBornForce
->
_pSysData
[
ii
]
/*
gpu->psPosq4->_pSysData[ii].x,
gpu->psPosq4->_pSysData[ii].y,
gpu->psPosq4->_pSysData[ii].z,
gpu->psPosq4->_pSysData[ii].w );
*/
);
if
(
(
ii
==
maxPrint
)
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
))
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
}
extern
__global__
void
kFindBlockBoundsCutoff_kernel
();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
);
void
kCalculateObcGbsaSoftcoreBornSum
(
freeEnergyGpuContext
freeEnergyGpu
)
{
// printf("kCalculateObcGbsaSoftcoreBornSum\n");
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
kClearObcGbsaSoftcoreBornSum
(
gpu
);
LAUNCHERROR
(
"kClearBornSum from kCalculateObcGbsaSoftcoreBornSum"
);
switch
(
freeEnergyGpu
->
freeEnergySim
.
nonbondedMethod
)
{
case
FREE_ENERGY_NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateObcGbsaSoftcoreN2BornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
break
;
case
FREE_ENERGY_CUTOFF
:
kFindBlockBoundsCutoff_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsCutoff"
);
kFindBlocksWithInteractionsCutoff_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsCutoff"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateObcGbsaSoftcoreCutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
FREE_ENERGY_PERIODIC
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateObcGbsaSoftcorePeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
default:
throw
OpenMM
::
OpenMMException
(
"Nonbonded softcore method not recognized."
);
}
LAUNCHERROR
(
"kCalculateObcGbsaSoftcoreBornSum"
);
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
deleted
100644 → 0
View file @
c775bd19
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for calculating Born sums. It is included
* several times in kCalculateObcGbsaBornSum.cu with different #defines to generate
* different versions of the kernels.
*/
#undef TARGET
//#define TARGET 1
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateObcGbsaSoftcore
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
//unsigned int x = workUnit[pos + (blockIdx.x*numWorkUnits)/gridDim.x];
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
r
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float2
ar
=
cSim
.
pObcData
[
i
];
// Local atom vr, sr
float
polarScaleData
=
gbsaSimDev
.
pNonPolarScalingFactors
[
i
];
// scale contribution
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
ar
.
x
;
sA
[
threadIdx
.
x
].
sr
=
ar
.
y
;
sA
[
threadIdx
.
x
].
polarScaleData
=
polarScaleData
;
apos
.
w
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#if defined USE_CUTOFF
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
)
#endif
{
r
=
sqrtf
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
if
(
(
j
!=
tgx
)
&&
(
ar
.
x
<
rScaledRadiusJ
)
){
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
logf
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
j
].
sr
;
if
(
ar
.
x
<
(
rj
-
r
)
){
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
psA
[
j
].
polarScaleData
*
term
;
}
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
unsigned
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pObcData
[
j
];
float
polarScaleDataJ
=
gbsaSimDev
.
pNonPolarScalingFactors
[
j
];
// scale contribution
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
apos
.
w
=
0
.
0
f
;
float2
ar
=
cSim
.
pObcData
[
i
];
// Local atom vr, sr
float
polarScaleDataI
=
gbsaSimDev
.
pNonPolarScalingFactors
[
i
];
// scale contribution
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
polarScaleData
=
polarScaleDataJ
;
sA
[
threadIdx
.
x
].
sum
=
0
.
0
f
;
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
// else if (flags == 0xFFFFFFFF)
else
if
(
flags
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
){
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_CUTOFF
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
)
#endif
{
r
=
sqrtf
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
if
(
ar
.
x
<
rScaledRadiusJ
)
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
logf
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
tj
].
sr
*
psA
[
tj
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
srj
=
psA
[
tj
].
sr
;
float
scale
=
psA
[
tj
].
polarScaleData
;
if
(
ar
.
x
<
(
srj
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
(
scale
*
term
);
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
if
(
psA
[
tj
].
r
<
rScaledRadiusI
)
{
float
l_ij
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
ar
.
y
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
logf
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
tj
].
r
;
if
(
rj
<
(
ar
.
y
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
tj
].
r
)
-
l_ij
);
}
psA
[
tj
].
sum
+=
polarScaleDataI
*
term
;
}
}
tj
=
(
tj
-
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
tempBuffer
[
threadIdx
.
x
]
=
0
.
0
f
;
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floorf
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floorf
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floorf
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_CUTOFF
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
)
#endif
{
r
=
sqrtf
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
if
(
ar
.
x
<
rScaledRadiusJ
)
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
logf
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
srj
=
psA
[
j
].
sr
;
if
(
ar
.
x
<
(
srj
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
psA
[
j
].
polarScaleData
*
term
;
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
if
(
psA
[
j
].
r
<
rScaledRadiusI
)
{
float
l_ij
=
1
.
0
f
/
max
(
psA
[
j
].
r
,
fabs
(
r
-
ar
.
y
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
logf
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
j
].
r
;
if
(
rj
<
(
ar
.
y
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
j
].
r
)
-
l_ij
);
}
tempBuffer
[
threadIdx
.
x
]
=
polarScaleDataI
*
term
;
}
}
// Sum the terms.
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
sum
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
}
}
}
#endif
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
sA
[
threadIdx
.
x
].
sum
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
sA
[
threadIdx
.
x
].
sum
;
#endif
}
pos
++
;
}
}
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment