Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
caefd490
Commit
caefd490
authored
Dec 21, 2013
by
Jason Swails
Browse files
Merge branch 'master' of
https://github.com/SimTk/openmm
parents
508f989d
0b35240d
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1673 additions
and
320 deletions
+1673
-320
examples/benchmark.py
examples/benchmark.py
+1
-1
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+255
-0
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+42
-5
platforms/cpu/include/CpuLangevinDynamics.h
platforms/cpu/include/CpuLangevinDynamics.h
+100
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-2
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+12
-17
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+89
-0
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+89
-0
platforms/cpu/include/CpuPlatform.h
platforms/cpu/include/CpuPlatform.h
+2
-0
platforms/cpu/include/CpuRandom.h
platforms/cpu/include/CpuRandom.h
+60
-0
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+6
-2
platforms/cpu/src/CpuKernelFactory.cpp
platforms/cpu/src/CpuKernelFactory.cpp
+2
-0
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+139
-19
platforms/cpu/src/CpuLangevinDynamics.cpp
platforms/cpu/src/CpuLangevinDynamics.cpp
+126
-0
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+24
-22
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+6
-252
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+301
-0
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+332
-0
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+1
-0
platforms/cpu/src/CpuRandom.cpp
platforms/cpu/src/CpuRandom.cpp
+84
-0
No files found.
examples/benchmark.py
View file @
caefd490
...
@@ -88,7 +88,7 @@ def runOneTest(testName, options):
...
@@ -88,7 +88,7 @@ def runOneTest(testName, options):
if
platform
.
getName
()
==
'CUDA'
:
if
platform
.
getName
()
==
'CUDA'
:
properties
[
'CudaPrecision'
]
=
options
.
precision
properties
[
'CudaPrecision'
]
=
options
.
precision
elif
platform
.
getName
()
==
'OpenCL'
:
elif
platform
.
getName
()
==
'OpenCL'
:
properties
[
'OpenCLPrecision'
]
=
options
.
device
properties
[
'OpenCLPrecision'
]
=
options
.
precision
# Run the simulation.
# Run the simulation.
...
...
openmmapi/include/openmm/internal/vectorize8.h
0 → 100644
View file @
caefd490
#ifndef OPENMM_VECTORIZE8_H_
#define OPENMM_VECTORIZE8_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "vectorize.h"
#include <immintrin.h>
// This file defines classes and functions to simplify vectorizing code with AVX.
class
ivec8
;
/**
* An eight element vector of floats.
*/
class
fvec8
{
public:
__m256
val
;
fvec8
()
{}
fvec8
(
float
v
)
:
val
(
_mm256_set1_ps
(
v
))
{}
fvec8
(
float
v1
,
float
v2
,
float
v3
,
float
v4
,
float
v5
,
float
v6
,
float
v7
,
float
v8
)
:
val
(
_mm256_set_ps
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
))
{}
fvec8
(
__m256
v
)
:
val
(
v
)
{}
fvec8
(
const
float
*
v
)
:
val
(
_mm256_loadu_ps
(
v
))
{}
operator
__m256
()
const
{
return
val
;
}
fvec4
lowerVec
()
const
{
return
_mm256_castps256_ps128
(
val
);
}
fvec4
upperVec
()
const
{
return
_mm256_extractf128_ps
(
val
,
1
);
}
void
store
(
float
*
v
)
const
{
_mm256_storeu_ps
(
v
,
val
);
}
fvec8
operator
+
(
fvec8
other
)
const
{
return
_mm256_add_ps
(
val
,
other
);
}
fvec8
operator
-
(
fvec8
other
)
const
{
return
_mm256_sub_ps
(
val
,
other
);
}
fvec8
operator
*
(
fvec8
other
)
const
{
return
_mm256_mul_ps
(
val
,
other
);
}
fvec8
operator
/
(
fvec8
other
)
const
{
return
_mm256_div_ps
(
val
,
other
);
}
void
operator
+=
(
fvec8
other
)
{
val
=
_mm256_add_ps
(
val
,
other
);
}
void
operator
-=
(
fvec8
other
)
{
val
=
_mm256_sub_ps
(
val
,
other
);
}
void
operator
*=
(
fvec8
other
)
{
val
=
_mm256_mul_ps
(
val
,
other
);
}
void
operator
/=
(
fvec8
other
)
{
val
=
_mm256_div_ps
(
val
,
other
);
}
fvec8
operator
-
()
const
{
return
_mm256_sub_ps
(
_mm256_set1_ps
(
0.0
f
),
val
);
}
fvec8
operator
&
(
fvec8
other
)
const
{
return
_mm256_and_ps
(
val
,
other
);
}
fvec8
operator
|
(
fvec8
other
)
const
{
return
_mm256_or_ps
(
val
,
other
);
}
fvec8
operator
==
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_EQ_OQ
);
}
fvec8
operator
!=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_NEQ_OQ
);
}
fvec8
operator
>
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GT_OQ
);
}
fvec8
operator
<
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LT_OQ
);
}
fvec8
operator
>=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GE_OQ
);
}
fvec8
operator
<=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LE_OQ
);
}
operator
ivec8
()
const
;
};
/**
* An eight element vector of ints.
*/
class
ivec8
{
public:
__m256i
val
;
ivec8
()
{}
ivec8
(
int
v
)
:
val
(
_mm256_set1_epi32
(
v
))
{}
ivec8
(
int
v1
,
int
v2
,
int
v3
,
int
v4
,
int
v5
,
int
v6
,
int
v7
,
int
v8
)
:
val
(
_mm256_set_epi32
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
))
{}
ivec8
(
__m256i
v
)
:
val
(
v
)
{}
ivec8
(
const
int
*
v
)
:
val
(
_mm256_loadu_si256
((
const
__m256i
*
)
v
))
{}
operator
__m256i
()
const
{
return
val
;
}
ivec4
lowerVec
()
const
{
return
_mm256_castsi256_si128
(
val
);
}
ivec4
upperVec
()
const
{
return
_mm256_extractf128_si256
(
val
,
1
);
}
void
store
(
int
*
v
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
v
,
val
);
}
ivec8
operator
&
(
ivec8
other
)
const
{
return
_mm256_castps_si256
(
_mm256_and_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
}
ivec8
operator
|
(
ivec8
other
)
const
{
return
_mm256_castps_si256
(
_mm256_or_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
}
operator
fvec8
()
const
;
};
// Conversion operators.
inline
fvec8
::
operator
ivec8
()
const
{
return
_mm256_cvttps_epi32
(
val
);
}
inline
ivec8
::
operator
fvec8
()
const
{
return
_mm256_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec8s.
static
inline
fvec8
floor
(
fvec8
v
)
{
return
fvec8
(
_mm256_floor_ps
(
v
.
val
));
}
static
inline
fvec8
ceil
(
fvec8
v
)
{
return
fvec8
(
_mm256_ceil_ps
(
v
.
val
));
}
static
inline
fvec8
round
(
fvec8
v
)
{
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec8
min
(
fvec8
v1
,
fvec8
v2
)
{
return
fvec8
(
_mm256_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec8
max
(
fvec8
v1
,
fvec8
v2
)
{
return
fvec8
(
_mm256_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec8
abs
(
fvec8
v
)
{
static
const
__m256
mask
=
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
0x7FFFFFFF
));
return
fvec8
(
_mm256_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec8
sqrt
(
fvec8
v
)
{
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot8
(
fvec8
v1
,
fvec8
v2
)
{
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
}
static
inline
void
transpose
(
fvec4
in1
,
fvec4
in2
,
fvec4
in3
,
fvec4
in4
,
fvec4
in5
,
fvec4
in6
,
fvec4
in7
,
fvec4
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
_MM_TRANSPOSE4_PS
(
in1
,
in2
,
in3
,
in4
);
_MM_TRANSPOSE4_PS
(
in5
,
in6
,
in7
,
in8
);
out1
=
_mm256_castps128_ps256
(
in1
);
out1
=
_mm256_insertf128_ps
(
out1
,
in5
,
1
);
out2
=
_mm256_castps128_ps256
(
in2
);
out2
=
_mm256_insertf128_ps
(
out2
,
in6
,
1
);
out3
=
_mm256_castps128_ps256
(
in3
);
out3
=
_mm256_insertf128_ps
(
out3
,
in7
,
1
);
out4
=
_mm256_castps128_ps256
(
in4
);
out4
=
_mm256_insertf128_ps
(
out4
,
in8
,
1
);
}
static
inline
void
transpose
(
fvec8
in1
,
fvec8
in2
,
fvec8
in3
,
fvec8
in4
,
fvec4
&
out1
,
fvec4
&
out2
,
fvec4
&
out3
,
fvec4
&
out4
,
fvec4
&
out5
,
fvec4
&
out6
,
fvec4
&
out7
,
fvec4
&
out8
)
{
out1
=
in1
.
lowerVec
();
out2
=
in2
.
lowerVec
();
out3
=
in3
.
lowerVec
();
out4
=
in4
.
lowerVec
();
_MM_TRANSPOSE4_PS
(
out1
,
out2
,
out3
,
out4
);
out5
=
in1
.
upperVec
();
out6
=
in2
.
upperVec
();
out7
=
in3
.
upperVec
();
out8
=
in4
.
upperVec
();
_MM_TRANSPOSE4_PS
(
out5
,
out6
,
out7
,
out8
);
}
// Functions that operate on ivec8s.
static
inline
bool
any
(
ivec8
v
)
{
return
!
_mm256_testz_si256
(
v
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec8
operator
+
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
+
v2
;
}
static
inline
fvec8
operator
-
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
-
v2
;
}
static
inline
fvec8
operator
*
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
*
v2
;
}
static
inline
fvec8
operator
/
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
/
v2
;
}
// Operations for blending fvec8s based on an ivec8.
static
inline
fvec8
blend
(
fvec8
v1
,
fvec8
v2
,
ivec8
mask
)
{
return
fvec8
(
_mm256_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm256_castsi256_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE8_H_*/
platforms/cpu/include/CpuKernels.h
View file @
caefd490
...
@@ -33,6 +33,7 @@
...
@@ -33,6 +33,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CpuGBSAOBCForce.h"
#include "CpuGBSAOBCForce.h"
#include "CpuLangevinDynamics.h"
#include "CpuNeighborList.h"
#include "CpuNeighborList.h"
#include "CpuNonbondedForce.h"
#include "CpuNonbondedForce.h"
#include "CpuPlatform.h"
#include "CpuPlatform.h"
...
@@ -48,6 +49,7 @@ namespace OpenMM {
...
@@ -48,6 +49,7 @@ namespace OpenMM {
*/
*/
class
CpuCalcForcesAndEnergyKernel
:
public
CalcForcesAndEnergyKernel
{
class
CpuCalcForcesAndEnergyKernel
:
public
CalcForcesAndEnergyKernel
{
public:
public:
class
SumForceTask
;
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
);
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
);
/**
/**
* Initialize the kernel.
* Initialize the kernel.
...
@@ -88,9 +90,7 @@ private:
...
@@ -88,9 +90,7 @@ private:
*/
*/
class
CpuCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CpuCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CpuCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CpuCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
);
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
)
{
}
~
CpuCalcNonbondedForceKernel
();
~
CpuCalcNonbondedForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
...
@@ -130,8 +130,8 @@ private:
...
@@ -130,8 +130,8 @@ private:
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
RealVec
>
lastPositions
;
std
::
vector
<
RealVec
>
lastPositions
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
CpuNeighborList
neighborList
;
CpuNeighborList
*
neighborList
;
CpuNonbondedForce
nonbonded
;
CpuNonbondedForce
*
nonbonded
;
Kernel
optimizedPme
;
Kernel
optimizedPme
;
};
};
...
@@ -173,6 +173,43 @@ private:
...
@@ -173,6 +173,43 @@ private:
CpuGBSAOBCForce
obc
;
CpuGBSAOBCForce
obc
;
};
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
class
CpuIntegrateLangevinStepKernel
:
public
IntegrateLangevinStepKernel
{
public:
CpuIntegrateLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
IntegrateLangevinStepKernel
(
name
,
platform
),
data
(
data
),
dynamics
(
NULL
)
{
}
~
CpuIntegrateLangevinStepKernel
();
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the LangevinIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
void
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
);
private:
CpuPlatform
::
PlatformData
&
data
;
CpuLangevinDynamics
*
dynamics
;
std
::
vector
<
RealOpenMM
>
masses
;
double
prevTemp
,
prevFriction
,
prevStepSize
;
};
}
// namespace OpenMM
}
// namespace OpenMM
#endif
/*OPENMM_CPUKERNELS_H_*/
#endif
/*OPENMM_CPUKERNELS_H_*/
...
...
platforms/cpu/include/CpuLangevinDynamics.h
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __CPU_LANGEVIN_DYNAMICS_H__
#define __CPU_LANGEVIN_DYNAMICS_H__
#include "ReferenceStochasticDynamics.h"
#include "CpuRandom.h"
#include "openmm/internal/ThreadPool.h"
#include "sfmt/SFMT.h"
// ---------------------------------------------------------------------------------------
class
CpuLangevinDynamics
:
public
ReferenceStochasticDynamics
{
public:
class
Update1Task
;
class
Update2Task
;
/**
* Constructor.
*
* @param numberOfAtoms number of atoms
* @param deltaT delta t for dynamics
* @param tau viscosity
* @param temperature temperature
* @param threads thread pool for parallelizing computation
* @param random random number generator
*/
CpuLangevinDynamics
(
int
numberOfAtoms
,
RealOpenMM
deltaT
,
RealOpenMM
tau
,
RealOpenMM
temperature
,
OpenMM
::
ThreadPool
&
threads
,
OpenMM
::
CpuRandom
&
random
);
/**
* Destructor.
*/
~
CpuLangevinDynamics
();
/**
* First update step.
*
* @param numberOfAtoms number of atoms
* @param atomCoordinates atom coordinates
* @param velocities velocities
* @param forces forces
* @param inverseMasses inverse atom masses
* @param xPrime xPrime
*/
void
updatePart1
(
int
numberOfAtoms
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
velocities
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
std
::
vector
<
RealOpenMM
>&
inverseMasses
,
std
::
vector
<
OpenMM
::
RealVec
>&
xPrime
);
/**
* Second update step.
*
* @param numberOfAtoms number of atoms
* @param atomCoordinates atom coordinates
* @param velocities velocities
* @param forces forces
* @param inverseMasses inverse atom masses
* @param xPrime xPrime
*/
void
updatePart2
(
int
numberOfAtoms
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
velocities
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
std
::
vector
<
RealOpenMM
>&
inverseMasses
,
std
::
vector
<
OpenMM
::
RealVec
>&
xPrime
);
private:
void
threadUpdate1
(
int
threadIndex
);
void
threadUpdate2
(
int
threadIndex
);
OpenMM
::
ThreadPool
&
threads
;
OpenMM
::
CpuRandom
&
random
;
std
::
vector
<
OpenMM_SFMT
::
SFMT
>
threadRandom
;
// The following variables are used to make information accessible to the individual threads.
int
numberOfAtoms
;
OpenMM
::
RealVec
*
atomCoordinates
;
OpenMM
::
RealVec
*
velocities
;
OpenMM
::
RealVec
*
forces
;
RealOpenMM
*
inverseMasses
;
OpenMM
::
RealVec
*
xPrime
;
};
// ---------------------------------------------------------------------------------------
#endif // __CPU_LANGEVIN_DYNAMICS_H__
platforms/cpu/include/CpuNeighborList.h
View file @
caefd490
...
@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
...
@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
public:
public:
class
ThreadTask
;
class
ThreadTask
;
class
Voxels
;
class
Voxels
;
static
const
int
BlockSize
;
CpuNeighborList
(
int
blockSize
);
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
int
getNumBlocks
()
const
;
...
@@ -59,6 +58,7 @@ public:
...
@@ -59,6 +58,7 @@ public:
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
runThread
(
int
index
);
void
runThread
(
int
index
);
private:
private:
int
blockSize
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
caefd490
...
@@ -48,7 +48,13 @@ class CpuNonbondedForce {
...
@@ -48,7 +48,13 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
();
CpuNonbondedForce
();
/**
* Virtual destructor.
*/
virtual
~
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
Set the force to use a cutoff.
...
@@ -151,7 +157,7 @@ class CpuNonbondedForce {
...
@@ -151,7 +157,7 @@ class CpuNonbondedForce {
*/
*/
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
pr
ivate
:
pr
otected
:
bool
cutoff
;
bool
cutoff
;
bool
useSwitch
;
bool
useSwitch
;
bool
periodic
;
bool
periodic
;
...
@@ -204,7 +210,7 @@ private:
...
@@ -204,7 +210,7 @@ private:
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
virtual
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
=
0
;
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -216,7 +222,7 @@ private:
...
@@ -216,7 +222,7 @@ private:
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
virtual
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
=
0
;
/**
/**
* Compute the displacement and squared distance between two points, optionally using
* Compute the displacement and squared distance between two points, optionally using
...
@@ -224,26 +230,15 @@ private:
...
@@ -224,26 +230,15 @@ private:
*/
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec4
erfcApprox
(
fvec4
x
);
/**
/**
* Create a lookup table for the scale factor used with Ewald and PME.
* Create a lookup table for the scale factor used with Ewald and PME.
*/
*/
void
tabulateEwaldScaleFactor
();
void
tabulateEwaldScaleFactor
();
/**
/**
*
Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*
Compute a fast approximation to erfc(x).
*/
*/
fvec4
ewaldScaleFunction
(
fvec4
x
);
static
float
erfcApprox
(
float
x
);
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#include "CpuNonbondedForce.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec4
:
public
CpuNonbondedForce
{
public:
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec4
erfcApprox
(
fvec4
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4
ewaldScaleFunction
(
fvec4
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
platforms/cpu/include/CpuNonbondedForceVec8.h
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
#ifdef __AVX__
#include "CpuNonbondedForce.h"
#include "openmm/internal/vectorize8.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec8
:
public
CpuNonbondedForce
{
public:
CpuNonbondedForceVec8
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec8
erfcApprox
(
fvec8
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8
ewaldScaleFunction
(
fvec8
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // __AVX__
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
platforms/cpu/include/CpuPlatform.h
View file @
caefd490
...
@@ -33,6 +33,7 @@
...
@@ -33,6 +33,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "AlignedArray.h"
#include "AlignedArray.h"
#include "CpuRandom.h"
#include "ReferencePlatform.h"
#include "ReferencePlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/ThreadPool.h"
...
@@ -74,6 +75,7 @@ public:
...
@@ -74,6 +75,7 @@ public:
std
::
vector
<
AlignedArray
<
float
>
>
threadForce
;
std
::
vector
<
AlignedArray
<
float
>
>
threadForce
;
ThreadPool
threads
;
ThreadPool
threads
;
bool
isPeriodic
;
bool
isPeriodic
;
CpuRandom
random
;
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuRandom.h
0 → 100644
View file @
caefd490
#ifndef OPENMM_CPURANDOM_H_
#define OPENMM_CPURANDOM_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "sfmt/SFMT.h"
#include <vector>
namespace
OpenMM
{
/**
* This class provides a multithreaded random number generator.
*/
class
OPENMM_EXPORT
CpuRandom
{
public:
CpuRandom
();
~
CpuRandom
();
void
initialize
(
int
seed
,
int
numThreads
);
float
getGaussianRandom
(
int
threadIndex
);
float
getUniformRandom
(
int
threadIndex
);
private:
bool
hasInitialized
;
int
randomSeed
;
std
::
vector
<
OpenMM_SFMT
::
SFMT
*>
threadRandom
;
std
::
vector
<
float
>
nextGaussian
;
std
::
vector
<
int
>
nextGaussianIsValid
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CPURANDOM_H_*/
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
caefd490
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDFOREACH
(
file
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
...
@@ -7,6 +11,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...
@@ -7,6 +11,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cpu/src/CpuKernelFactory.cpp
View file @
caefd490
...
@@ -45,5 +45,7 @@ KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform&
...
@@ -45,5 +45,7 @@ KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform&
return
new
CpuCalcNonbondedForceKernel
(
name
,
platform
,
data
);
return
new
CpuCalcNonbondedForceKernel
(
name
,
platform
,
data
);
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
return
new
CpuCalcGBSAOBCForceKernel
(
name
,
platform
,
data
);
return
new
CpuCalcGBSAOBCForceKernel
(
name
,
platform
,
data
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
return
new
CpuIntegrateLangevinStepKernel
(
name
,
platform
,
data
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
}
platforms/cpu/src/CpuKernels.cpp
View file @
caefd490
...
@@ -31,6 +31,7 @@
...
@@ -31,6 +31,7 @@
#include "CpuKernels.h"
#include "CpuKernels.h"
#include "ReferenceBondForce.h"
#include "ReferenceBondForce.h"
#include "ReferenceConstraints.h"
#include "ReferenceKernelFactory.h"
#include "ReferenceKernelFactory.h"
#include "ReferenceKernels.h"
#include "ReferenceKernels.h"
#include "ReferenceLJCoulomb14.h"
#include "ReferenceLJCoulomb14.h"
...
@@ -64,6 +65,71 @@ static RealVec& extractBoxSize(ContextImpl& context) {
...
@@ -64,6 +65,71 @@ static RealVec& extractBoxSize(ContextImpl& context) {
return
*
(
RealVec
*
)
data
->
periodicBoxSize
;
return
*
(
RealVec
*
)
data
->
periodicBoxSize
;
}
}
static
ReferenceConstraints
&
extractConstraints
(
ContextImpl
&
context
)
{
ReferencePlatform
::
PlatformData
*
data
=
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
return
*
(
ReferenceConstraints
*
)
data
->
constraints
;
}
/**
* Compute the kinetic energy of the system, possibly shifting the velocities in time to account
* for a leapfrog integrator.
*/
static
double
computeShiftedKineticEnergy
(
ContextImpl
&
context
,
vector
<
double
>&
masses
,
double
timeShift
)
{
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
velData
=
extractVelocities
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
// Compute the shifted velocities.
vector
<
RealVec
>
shiftedVel
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
if
(
masses
[
i
]
>
0
)
shiftedVel
[
i
]
=
velData
[
i
]
+
forceData
[
i
]
*
(
timeShift
/
masses
[
i
]);
else
shiftedVel
[
i
]
=
velData
[
i
];
}
// Apply constraints to them.
vector
<
double
>
inverseMasses
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
inverseMasses
[
i
]
=
(
masses
[
i
]
==
0
?
0
:
1
/
masses
[
i
]);
extractConstraints
(
context
).
applyToVelocities
(
posData
,
shiftedVel
,
inverseMasses
,
1e-4
);
// Compute the kinetic energy.
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
if
(
masses
[
i
]
>
0
)
energy
+=
masses
[
i
]
*
(
shiftedVel
[
i
].
dot
(
shiftedVel
[
i
]));
return
0.5
*
energy
;
}
class
CpuCalcForcesAndEnergyKernel
::
SumForceTask
:
public
ThreadPool
::
Task
{
public:
SumForceTask
(
int
numParticles
,
vector
<
RealVec
>&
forceData
,
CpuPlatform
::
PlatformData
&
data
)
:
numParticles
(
numParticles
),
forceData
(
forceData
),
data
(
data
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Sum the contributions to forces that have been calculated by different threads.
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
}
int
numParticles
;
vector
<
RealVec
>&
forceData
;
CpuPlatform
::
PlatformData
&
data
;
};
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
// Create a Reference platform version of this kernel.
// Create a Reference platform version of this kernel.
...
@@ -111,17 +177,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
...
@@ -111,17 +177,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
// Sum the forces from all the threads.
// Sum the forces from all the threads.
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
SumForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
extractForces
(
context
),
data
);
int
numThreads
=
data
.
threads
.
getNumThreads
();
data
.
threads
.
execute
(
task
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
data
.
threads
.
waitForThreads
();
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
}
...
@@ -145,6 +203,22 @@ private:
...
@@ -145,6 +203,22 @@ private:
int
numParticles
;
int
numParticles
;
};
};
bool
isVec8Supported
();
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
neighborList
(
NULL
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
{
neighborList
=
new
CpuNeighborList
(
8
);
nonbonded
=
createCpuNonbondedForceVec8
();
}
else
{
neighborList
=
new
CpuNeighborList
(
4
);
nonbonded
=
createCpuNonbondedForceVec4
();
}
}
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
if
(
bonded14ParamArray
!=
NULL
)
{
if
(
bonded14ParamArray
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
...
@@ -154,6 +228,10 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
...
@@ -154,6 +228,10 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
delete
bonded14IndexArray
;
delete
bonded14IndexArray
;
delete
bonded14ParamArray
;
delete
bonded14ParamArray
;
}
}
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
}
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
@@ -305,26 +383,26 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -305,26 +383,26 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
}
if
(
needRecompute
)
{
if
(
needRecompute
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
neighborList
->
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
lastPositions
=
posData
;
lastPositions
=
posData
;
}
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
,
rfDielectric
);
}
}
if
(
data
.
isPeriodic
)
{
if
(
data
.
isPeriodic
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
nonbonded
.
setPeriodic
(
floatBoxSize
);
nonbonded
->
setPeriodic
(
floatBoxSize
);
}
}
if
(
ewald
)
if
(
ewald
)
nonbonded
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
nonbonded
->
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
if
(
pme
)
nonbonded
.
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
if
(
useSwitchingFunction
)
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
nonbonded
->
setUseSwitchingFunction
(
switchingDistance
);
double
nonbondedEnergy
=
0
;
double
nonbondedEnergy
=
0
;
if
(
includeDirect
)
if
(
includeDirect
)
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
nonbonded
->
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
if
(
includeReciprocal
)
{
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
...
@@ -333,7 +411,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -333,7 +411,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
}
else
else
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
->
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
}
energy
+=
nonbondedEnergy
;
energy
+=
nonbondedEnergy
;
if
(
includeDirect
)
{
if
(
includeDirect
)
{
...
@@ -440,3 +518,45 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
...
@@ -440,3 +518,45 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
}
}
obc
.
setParticleParameters
(
particleParams
);
obc
.
setParticleParameters
(
particleParams
);
}
}
CpuIntegrateLangevinStepKernel
::~
CpuIntegrateLangevinStepKernel
()
{
if
(
dynamics
)
delete
dynamics
;
}
void
CpuIntegrateLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
)
{
int
numParticles
=
system
.
getNumParticles
();
masses
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
masses
[
i
]
=
static_cast
<
RealOpenMM
>
(
system
.
getParticleMass
(
i
));
data
.
random
.
initialize
(
integrator
.
getRandomNumberSeed
(),
data
.
threads
.
getNumThreads
());
}
void
CpuIntegrateLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
velData
=
extractVelocities
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
if
(
dynamics
==
0
||
temperature
!=
prevTemp
||
friction
!=
prevFriction
||
stepSize
!=
prevStepSize
)
{
// Recreate the computation objects with the new parameters.
if
(
dynamics
)
delete
dynamics
;
RealOpenMM
tau
=
(
friction
==
0.0
?
0.0
:
1.0
/
friction
);
dynamics
=
new
CpuLangevinDynamics
(
context
.
getSystem
().
getNumParticles
(),
stepSize
,
tau
,
temperature
,
data
.
threads
,
data
.
random
);
dynamics
->
setReferenceConstraintAlgorithm
(
&
extractConstraints
(
context
));
prevTemp
=
temperature
;
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
}
dynamics
->
update
(
context
.
getSystem
(),
posData
,
velData
,
forceData
,
masses
,
integrator
.
getConstraintTolerance
());
ReferencePlatform
::
PlatformData
*
refData
=
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
refData
->
time
+=
stepSize
;
refData
->
stepCount
++
;
}
double
CpuIntegrateLangevinStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
return
computeShiftedKineticEnergy
(
context
,
masses
,
0.5
*
integrator
.
getStepSize
());
}
platforms/cpu/src/CpuLangevinDynamics.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuLangevinDynamics.h"
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuLangevinDynamics
::
Update1Task
:
public
ThreadPool
::
Task
{
public:
Update1Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate1
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
class
CpuLangevinDynamics
::
Update2Task
:
public
ThreadPool
::
Task
{
public:
Update2Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate2
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
CpuLangevinDynamics
::
CpuLangevinDynamics
(
int
numberOfAtoms
,
RealOpenMM
deltaT
,
RealOpenMM
tau
,
RealOpenMM
temperature
,
ThreadPool
&
threads
,
CpuRandom
&
random
)
:
ReferenceStochasticDynamics
(
numberOfAtoms
,
deltaT
,
tau
,
temperature
),
threads
(
threads
),
random
(
random
)
{
}
CpuLangevinDynamics
::~
CpuLangevinDynamics
()
{
}
void
CpuLangevinDynamics
::
updatePart1
(
int
numberOfAtoms
,
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
velocities
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
inverseMasses
,
vector
<
RealVec
>&
xPrime
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
velocities
=
&
velocities
[
0
];
this
->
forces
=
&
forces
[
0
];
this
->
inverseMasses
=
&
inverseMasses
[
0
];
this
->
xPrime
=
&
xPrime
[
0
];
// Signal the threads to start running and wait for them to finish.
Update1Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuLangevinDynamics
::
updatePart2
(
int
numberOfAtoms
,
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
velocities
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
inverseMasses
,
vector
<
RealVec
>&
xPrime
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
velocities
=
&
velocities
[
0
];
this
->
forces
=
&
forces
[
0
];
this
->
inverseMasses
=
&
inverseMasses
[
0
];
this
->
xPrime
=
&
xPrime
[
0
];
// Signal the threads to start running and wait for them to finish.
Update2Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuLangevinDynamics
::
threadUpdate1
(
int
threadIndex
)
{
const
RealOpenMM
tau
=
getTau
();
const
RealOpenMM
vscale
=
EXP
(
-
getDeltaT
()
/
tau
);
const
RealOpenMM
fscale
=
(
1
-
vscale
)
*
tau
;
const
RealOpenMM
kT
=
BOLTZ
*
getTemperature
();
const
RealOpenMM
noisescale
=
SQRT
(
2
*
kT
/
tau
)
*
SQRT
(
0.5
*
(
1
-
vscale
*
vscale
)
*
tau
);
int
start
=
threadIndex
*
numberOfAtoms
/
threads
.
getNumThreads
();
int
end
=
(
threadIndex
+
1
)
*
numberOfAtoms
/
threads
.
getNumThreads
();
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
if
(
inverseMasses
[
i
]
!=
0.0
)
{
RealOpenMM
sqrtInvMass
=
SQRT
(
inverseMasses
[
i
]);
RealVec
noise
(
random
.
getGaussianRandom
(
threadIndex
),
random
.
getGaussianRandom
(
threadIndex
),
random
.
getGaussianRandom
(
threadIndex
));
velocities
[
i
]
=
velocities
[
i
]
*
vscale
+
forces
[
i
]
*
(
fscale
*
inverseMasses
[
i
])
+
noise
*
(
noisescale
*
sqrtInvMass
);
}
}
}
void
CpuLangevinDynamics
::
threadUpdate2
(
int
threadIndex
)
{
const
RealOpenMM
dt
=
getDeltaT
();
int
start
=
threadIndex
*
numberOfAtoms
/
threads
.
getNumThreads
();
int
end
=
(
threadIndex
+
1
)
*
numberOfAtoms
/
threads
.
getNumThreads
();
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
if
(
inverseMasses
[
i
]
!=
0.0
)
{
RealOpenMM
sqrtInvMass
=
SQRT
(
inverseMasses
[
i
]);
xPrime
[
i
]
=
atomCoordinates
[
i
]
+
velocities
[
i
]
*
dt
;
}
}
}
platforms/cpu/src/CpuNeighborList.cpp
View file @
caefd490
...
@@ -43,8 +43,6 @@ using namespace std;
...
@@ -43,8 +43,6 @@ using namespace std;
namespace
OpenMM
{
namespace
OpenMM
{
const
int
CpuNeighborList
::
BlockSize
=
4
;
class
VoxelIndex
class
VoxelIndex
{
{
public:
public:
...
@@ -62,8 +60,8 @@ public:
...
@@ -62,8 +60,8 @@ public:
*/
*/
class
CpuNeighborList
::
Voxels
{
class
CpuNeighborList
::
Voxels
{
public:
public:
Voxels
(
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
Voxels
(
int
blockSize
,
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
blockSize
(
blockSize
),
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
...
@@ -156,7 +154,7 @@ public:
...
@@ -156,7 +154,7 @@ public:
return
VoxelIndex
(
x
,
y
);
return
VoxelIndex
(
x
,
y
);
}
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
blockAtoms
,
const
float
*
atomLocations
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
&
blockAtoms
,
const
float
*
atomLocations
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
@@ -175,9 +173,6 @@ public:
...
@@ -175,9 +173,6 @@ public:
float
centerPos
[
4
];
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
atomVoxelIndex
[
BlockSize
];
for
(
int
i
=
0
;
i
<
(
int
)
blockAtoms
.
size
();
i
++
)
atomVoxelIndex
[
i
]
=
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
i
]]);
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
...
@@ -193,7 +188,7 @@ public:
...
@@ -193,7 +188,7 @@ public:
endx
=
min
(
endx
,
nx
-
1
);
endx
=
min
(
endx
,
nx
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
}
}
int
lastSortedIndex
=
B
lockSize
*
(
blockIndex
+
1
);
int
lastSortedIndex
=
b
lockSize
*
(
blockIndex
+
1
);
VoxelIndex
voxelIndex
(
0
,
0
);
VoxelIndex
voxelIndex
(
0
,
0
);
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
voxelIndex
.
x
=
x
;
voxelIndex
.
x
=
x
;
...
@@ -300,10 +295,12 @@ public:
...
@@ -300,10 +295,12 @@ public:
// Add this atom to the list of neighbors.
// Add this atom to the list of neighbors.
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
if
(
sortedIndex
<
B
lockSize
*
blockIndex
)
if
(
sortedIndex
<
b
lockSize
*
blockIndex
)
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
0
);
else
else
{
exclusions
.
push_back
(
0xF
&
(
0xF
<<
(
sortedIndex
-
BlockSize
*
blockIndex
)));
int
mask
=
(
1
<<
blockSize
)
-
1
;
exclusions
.
push_back
(
mask
&
(
mask
<<
(
sortedIndex
-
blockSize
*
blockIndex
)));
}
}
}
}
}
}
}
...
@@ -311,6 +308,7 @@ public:
...
@@ -311,6 +308,7 @@ public:
}
}
private:
private:
int
blockSize
;
float
voxelSizeX
,
voxelSizeY
;
float
voxelSizeX
,
voxelSizeY
;
float
minx
,
maxx
,
miny
,
maxy
;
float
minx
,
maxx
,
miny
,
maxy
;
int
nx
,
ny
;
int
nx
,
ny
;
...
@@ -329,12 +327,12 @@ public:
...
@@ -329,12 +327,12 @@ public:
CpuNeighborList
&
owner
;
CpuNeighborList
&
owner
;
};
};
CpuNeighborList
::
CpuNeighborList
()
{
CpuNeighborList
::
CpuNeighborList
(
int
blockSize
)
:
blockSize
(
blockSize
)
{
}
}
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
int
numBlocks
=
(
numAtoms
+
B
lockSize
-
1
)
/
B
lockSize
;
int
numBlocks
=
(
numAtoms
+
b
lockSize
-
1
)
/
b
lockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
sortedAtoms
.
resize
(
numAtoms
);
...
@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
edgeSizeX
=
0.6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeX
=
0.6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
edgeSizeY
=
0.6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
}
}
Voxels
voxels
(
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
Voxels
voxels
(
blockSize
,
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
sortedAtoms
[
i
]
=
atomIndex
;
...
@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Add padding atoms to fill up the last block.
// Add padding atoms to fill up the last block.
int
numPadding
=
numBlocks
*
B
lockSize
-
numAtoms
;
int
numPadding
=
numBlocks
*
b
lockSize
-
numAtoms
;
if
(
numPadding
>
0
)
{
if
(
numPadding
>
0
)
{
char
mask
=
(
0xF
0
>>
numPadding
)
&
0xF
;
char
mask
=
(
(
0xF
FFF
-
(
1
<<
blockSize
)
+
1
)
>>
numPadding
);
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
sortedAtoms
.
push_back
(
0
);
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
...
@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
}
}
int
CpuNeighborList
::
getNumBlocks
()
const
{
int
CpuNeighborList
::
getNumBlocks
()
const
{
return
sortedAtoms
.
size
()
/
B
lockSize
;
return
sortedAtoms
.
size
()
/
b
lockSize
;
}
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
...
@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numBlocks
=
blockNeighbors
.
size
();
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
int
>
blockAtoms
;
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
// Find the atoms in this block and compute their bounding box.
int
firstIndex
=
B
lockSize
*
i
;
int
firstIndex
=
b
lockSize
*
i
;
int
atomsInBlock
=
min
(
B
lockSize
,
numAtoms
-
firstIndex
);
int
atomsInBlock
=
min
(
b
lockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
atomVoxelIndex
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
...
@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
minPos
=
min
(
minPos
,
pos
);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
// Record the exclusions for this block.
// Record the exclusions for this block.
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
caefd490
...
@@ -29,7 +29,6 @@
...
@@ -29,7 +29,6 @@
#include "CpuNonbondedForce.h"
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "ReferencePME.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
#include "gmx_atomic.h"
// In case we're using some primitive version of Visual Studio this will
// In case we're using some primitive version of Visual Studio this will
...
@@ -61,6 +60,9 @@ public:
...
@@ -61,6 +60,9 @@ public:
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
}
}
CpuNonbondedForce
::~
CpuNonbondedForce
()
{
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
Set the force to use a cutoff.
...
@@ -356,7 +358,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
...
@@ -356,7 +358,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float
inverseR
=
1
/
r
;
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)
[
0
]
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
fvec4
result
=
deltaR
*
dEdR
;
...
@@ -446,228 +448,6 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
...
@@ -446,228 +448,6 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
}
void
CpuNonbondedForce
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
false
;
if
(
periodic
)
{
for
(
int
i
=
0
;
i
<
4
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
false
;
for
(
int
i
=
0
;
i
<
4
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
if
(
periodic
)
{
...
@@ -677,41 +457,15 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
...
@@ -677,41 +457,15 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
r2
=
dot3
(
deltaR
,
deltaR
);
}
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
float
CpuNonbondedForce
::
erfcApprox
(
float
x
)
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForce
::
erfcApprox
(
fvec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
// error of 3e-7.
f
vec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
f
loat
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
return
1.0
f
/
(
t
*
t
);
}
}
fvec4
CpuNonbondedForce
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec4.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
using
namespace
std
;
using
namespace
OpenMM
;
/**
* Factory method to create a CpuNonbondedForceVec4.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec4
()
{
return
new
CpuNonbondedForceVec4
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec4 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForceVec4
::
erfcApprox
(
fvec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec4
CpuNonbondedForceVec4
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "openmm/internal/hardware.h"
using
namespace
std
;
using
namespace
OpenMM
;
#ifndef __AVX__
bool
isVec8Supported
()
{
return
false
;
}
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
throw
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
}
#else
/**
* Check whether 8 component vectors are supported with the current CPU.
*/
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
28
))
!=
0
);
}
return
false
;
}
/**
* Factory method to create a CpuNonbondedForceVec8.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
return
new
CpuNonbondedForceVec8
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec8 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
fx
=
dx
*
dEdR
;
fvec8
fy
=
dy
*
dEdR
;
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
fx
=
dx
*
dEdR
;
fvec8
fy
=
dy
*
dEdR
;
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec8
CpuNonbondedForceVec8
::
erfcApprox
(
fvec8
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec8
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec8
CpuNonbondedForceVec8
::
ewaldScaleFunction
(
fvec8
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec8
x1
=
x
*
ewaldDXInv
;
ivec8
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
x1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
ewaldScaleTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
ewaldScaleTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
ewaldScaleTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
ewaldScaleTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
ewaldScaleTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
platforms/cpu/src/CpuPlatform.cpp
View file @
caefd490
...
@@ -53,6 +53,7 @@ CpuPlatform::CpuPlatform() {
...
@@ -53,6 +53,7 @@ CpuPlatform::CpuPlatform() {
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
}
}
double
CpuPlatform
::
getSpeed
()
const
{
double
CpuPlatform
::
getSpeed
()
const
{
...
...
platforms/cpu/src/CpuRandom.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuRandom.h"
#include "openmm/OpenMMException.h"
#include <cmath>
using
namespace
std
;
using
namespace
OpenMM
;
CpuRandom
::
CpuRandom
()
:
hasInitialized
(
false
)
{
}
CpuRandom
::~
CpuRandom
()
{
for
(
int
i
=
0
;
i
<
threadRandom
.
size
();
i
++
)
delete
threadRandom
[
i
];
}
void
CpuRandom
::
initialize
(
int
seed
,
int
numThreads
)
{
if
(
hasInitialized
)
{
if
(
seed
==
randomSeed
)
return
;
// Already initialized with the same seed.
throw
OpenMMException
(
"Random number generator initialized twice with different seeds"
);
}
randomSeed
=
seed
;
hasInitialized
=
true
;
threadRandom
.
resize
(
numThreads
);
nextGaussian
.
resize
(
numThreads
);
nextGaussianIsValid
.
resize
(
numThreads
,
false
);
// Use a quick and dirty RNG to pick seeds for the real random number generator.
unsigned
int
r
=
(
unsigned
int
)
seed
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
threadRandom
[
i
]
=
new
OpenMM_SFMT
::
SFMT
();
init_gen_rand
(
r
,
*
threadRandom
[
i
]);
}
}
float
CpuRandom
::
getGaussianRandom
(
int
threadIndex
)
{
if
(
nextGaussianIsValid
[
threadIndex
])
{
nextGaussianIsValid
[
threadIndex
]
=
false
;
return
nextGaussian
[
threadIndex
];
}
// Use the polar form of the Box-Muller transformation to generate two Gaussian random numbers.
float
x
,
y
,
r2
;
do
{
x
=
2.0
f
*
(
float
)
genrand_real2
(
*
threadRandom
[
threadIndex
])
-
1.0
f
;
y
=
2.0
f
*
(
float
)
genrand_real2
(
*
threadRandom
[
threadIndex
])
-
1.0
f
;
r2
=
x
*
x
+
y
*
y
;
}
while
(
r2
>=
1.0
f
||
r2
==
0.0
f
);
float
multiplier
=
sqrtf
((
-
2.0
f
*
logf
(
r2
))
/
r2
);
nextGaussian
[
threadIndex
]
=
y
*
multiplier
;
nextGaussianIsValid
[
threadIndex
]
=
true
;
return
x
*
multiplier
;
}
float
CpuRandom
::
getUniformRandom
(
int
threadIndex
)
{
return
genrand_real2
(
*
threadRandom
[
threadIndex
]);
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment