Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
caefd490
"csrc/vscode:/vscode.git/clone" did not exist on "dd5d4bb3c0e1873291595d760ff14a071cc6c3cb"
Commit
caefd490
authored
Dec 21, 2013
by
Jason Swails
Browse files
Merge branch 'master' of
https://github.com/SimTk/openmm
parents
508f989d
0b35240d
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1673 additions
and
320 deletions
+1673
-320
examples/benchmark.py
examples/benchmark.py
+1
-1
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+255
-0
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+42
-5
platforms/cpu/include/CpuLangevinDynamics.h
platforms/cpu/include/CpuLangevinDynamics.h
+100
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-2
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+12
-17
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+89
-0
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+89
-0
platforms/cpu/include/CpuPlatform.h
platforms/cpu/include/CpuPlatform.h
+2
-0
platforms/cpu/include/CpuRandom.h
platforms/cpu/include/CpuRandom.h
+60
-0
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+6
-2
platforms/cpu/src/CpuKernelFactory.cpp
platforms/cpu/src/CpuKernelFactory.cpp
+2
-0
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+139
-19
platforms/cpu/src/CpuLangevinDynamics.cpp
platforms/cpu/src/CpuLangevinDynamics.cpp
+126
-0
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+24
-22
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+6
-252
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+301
-0
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+332
-0
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+1
-0
platforms/cpu/src/CpuRandom.cpp
platforms/cpu/src/CpuRandom.cpp
+84
-0
No files found.
examples/benchmark.py
View file @
caefd490
...
@@ -88,7 +88,7 @@ def runOneTest(testName, options):
...
@@ -88,7 +88,7 @@ def runOneTest(testName, options):
if
platform
.
getName
()
==
'CUDA'
:
if
platform
.
getName
()
==
'CUDA'
:
properties
[
'CudaPrecision'
]
=
options
.
precision
properties
[
'CudaPrecision'
]
=
options
.
precision
elif
platform
.
getName
()
==
'OpenCL'
:
elif
platform
.
getName
()
==
'OpenCL'
:
properties
[
'OpenCLPrecision'
]
=
options
.
device
properties
[
'OpenCLPrecision'
]
=
options
.
precision
# Run the simulation.
# Run the simulation.
...
...
openmmapi/include/openmm/internal/vectorize8.h
0 → 100644
View file @
caefd490
#ifndef OPENMM_VECTORIZE8_H_
#define OPENMM_VECTORIZE8_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "vectorize.h"
#include <immintrin.h>
// This file defines classes and functions to simplify vectorizing code with AVX.
class
ivec8
;
/**
* An eight element vector of floats.
*/
class
fvec8
{
public:
__m256
val
;
fvec8
()
{}
fvec8
(
float
v
)
:
val
(
_mm256_set1_ps
(
v
))
{}
fvec8
(
float
v1
,
float
v2
,
float
v3
,
float
v4
,
float
v5
,
float
v6
,
float
v7
,
float
v8
)
:
val
(
_mm256_set_ps
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
))
{}
fvec8
(
__m256
v
)
:
val
(
v
)
{}
fvec8
(
const
float
*
v
)
:
val
(
_mm256_loadu_ps
(
v
))
{}
operator
__m256
()
const
{
return
val
;
}
fvec4
lowerVec
()
const
{
return
_mm256_castps256_ps128
(
val
);
}
fvec4
upperVec
()
const
{
return
_mm256_extractf128_ps
(
val
,
1
);
}
void
store
(
float
*
v
)
const
{
_mm256_storeu_ps
(
v
,
val
);
}
fvec8
operator
+
(
fvec8
other
)
const
{
return
_mm256_add_ps
(
val
,
other
);
}
fvec8
operator
-
(
fvec8
other
)
const
{
return
_mm256_sub_ps
(
val
,
other
);
}
fvec8
operator
*
(
fvec8
other
)
const
{
return
_mm256_mul_ps
(
val
,
other
);
}
fvec8
operator
/
(
fvec8
other
)
const
{
return
_mm256_div_ps
(
val
,
other
);
}
void
operator
+=
(
fvec8
other
)
{
val
=
_mm256_add_ps
(
val
,
other
);
}
void
operator
-=
(
fvec8
other
)
{
val
=
_mm256_sub_ps
(
val
,
other
);
}
void
operator
*=
(
fvec8
other
)
{
val
=
_mm256_mul_ps
(
val
,
other
);
}
void
operator
/=
(
fvec8
other
)
{
val
=
_mm256_div_ps
(
val
,
other
);
}
fvec8
operator
-
()
const
{
return
_mm256_sub_ps
(
_mm256_set1_ps
(
0.0
f
),
val
);
}
fvec8
operator
&
(
fvec8
other
)
const
{
return
_mm256_and_ps
(
val
,
other
);
}
fvec8
operator
|
(
fvec8
other
)
const
{
return
_mm256_or_ps
(
val
,
other
);
}
fvec8
operator
==
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_EQ_OQ
);
}
fvec8
operator
!=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_NEQ_OQ
);
}
fvec8
operator
>
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GT_OQ
);
}
fvec8
operator
<
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LT_OQ
);
}
fvec8
operator
>=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GE_OQ
);
}
fvec8
operator
<=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LE_OQ
);
}
operator
ivec8
()
const
;
};
/**
* An eight element vector of ints.
*/
class
ivec8
{
public:
__m256i
val
;
ivec8
()
{}
ivec8
(
int
v
)
:
val
(
_mm256_set1_epi32
(
v
))
{}
ivec8
(
int
v1
,
int
v2
,
int
v3
,
int
v4
,
int
v5
,
int
v6
,
int
v7
,
int
v8
)
:
val
(
_mm256_set_epi32
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
))
{}
ivec8
(
__m256i
v
)
:
val
(
v
)
{}
ivec8
(
const
int
*
v
)
:
val
(
_mm256_loadu_si256
((
const
__m256i
*
)
v
))
{}
operator
__m256i
()
const
{
return
val
;
}
ivec4
lowerVec
()
const
{
return
_mm256_castsi256_si128
(
val
);
}
ivec4
upperVec
()
const
{
return
_mm256_extractf128_si256
(
val
,
1
);
}
void
store
(
int
*
v
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
v
,
val
);
}
ivec8
operator
&
(
ivec8
other
)
const
{
return
_mm256_castps_si256
(
_mm256_and_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
}
ivec8
operator
|
(
ivec8
other
)
const
{
return
_mm256_castps_si256
(
_mm256_or_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
}
operator
fvec8
()
const
;
};
// Conversion operators.
inline
fvec8
::
operator
ivec8
()
const
{
return
_mm256_cvttps_epi32
(
val
);
}
inline
ivec8
::
operator
fvec8
()
const
{
return
_mm256_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec8s.
static
inline
fvec8
floor
(
fvec8
v
)
{
return
fvec8
(
_mm256_floor_ps
(
v
.
val
));
}
static
inline
fvec8
ceil
(
fvec8
v
)
{
return
fvec8
(
_mm256_ceil_ps
(
v
.
val
));
}
static
inline
fvec8
round
(
fvec8
v
)
{
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec8
min
(
fvec8
v1
,
fvec8
v2
)
{
return
fvec8
(
_mm256_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec8
max
(
fvec8
v1
,
fvec8
v2
)
{
return
fvec8
(
_mm256_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec8
abs
(
fvec8
v
)
{
static
const
__m256
mask
=
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
0x7FFFFFFF
));
return
fvec8
(
_mm256_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec8
sqrt
(
fvec8
v
)
{
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot8
(
fvec8
v1
,
fvec8
v2
)
{
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
}
static
inline
void
transpose
(
fvec4
in1
,
fvec4
in2
,
fvec4
in3
,
fvec4
in4
,
fvec4
in5
,
fvec4
in6
,
fvec4
in7
,
fvec4
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
_MM_TRANSPOSE4_PS
(
in1
,
in2
,
in3
,
in4
);
_MM_TRANSPOSE4_PS
(
in5
,
in6
,
in7
,
in8
);
out1
=
_mm256_castps128_ps256
(
in1
);
out1
=
_mm256_insertf128_ps
(
out1
,
in5
,
1
);
out2
=
_mm256_castps128_ps256
(
in2
);
out2
=
_mm256_insertf128_ps
(
out2
,
in6
,
1
);
out3
=
_mm256_castps128_ps256
(
in3
);
out3
=
_mm256_insertf128_ps
(
out3
,
in7
,
1
);
out4
=
_mm256_castps128_ps256
(
in4
);
out4
=
_mm256_insertf128_ps
(
out4
,
in8
,
1
);
}
static
inline
void
transpose
(
fvec8
in1
,
fvec8
in2
,
fvec8
in3
,
fvec8
in4
,
fvec4
&
out1
,
fvec4
&
out2
,
fvec4
&
out3
,
fvec4
&
out4
,
fvec4
&
out5
,
fvec4
&
out6
,
fvec4
&
out7
,
fvec4
&
out8
)
{
out1
=
in1
.
lowerVec
();
out2
=
in2
.
lowerVec
();
out3
=
in3
.
lowerVec
();
out4
=
in4
.
lowerVec
();
_MM_TRANSPOSE4_PS
(
out1
,
out2
,
out3
,
out4
);
out5
=
in1
.
upperVec
();
out6
=
in2
.
upperVec
();
out7
=
in3
.
upperVec
();
out8
=
in4
.
upperVec
();
_MM_TRANSPOSE4_PS
(
out5
,
out6
,
out7
,
out8
);
}
// Functions that operate on ivec8s.
static
inline
bool
any
(
ivec8
v
)
{
return
!
_mm256_testz_si256
(
v
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec8
operator
+
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
+
v2
;
}
static
inline
fvec8
operator
-
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
-
v2
;
}
static
inline
fvec8
operator
*
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
*
v2
;
}
static
inline
fvec8
operator
/
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
/
v2
;
}
// Operations for blending fvec8s based on an ivec8.
static
inline
fvec8
blend
(
fvec8
v1
,
fvec8
v2
,
ivec8
mask
)
{
return
fvec8
(
_mm256_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm256_castsi256_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE8_H_*/
platforms/cpu/include/CpuKernels.h
View file @
caefd490
...
@@ -33,6 +33,7 @@
...
@@ -33,6 +33,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CpuGBSAOBCForce.h"
#include "CpuGBSAOBCForce.h"
#include "CpuLangevinDynamics.h"
#include "CpuNeighborList.h"
#include "CpuNeighborList.h"
#include "CpuNonbondedForce.h"
#include "CpuNonbondedForce.h"
#include "CpuPlatform.h"
#include "CpuPlatform.h"
...
@@ -48,6 +49,7 @@ namespace OpenMM {
...
@@ -48,6 +49,7 @@ namespace OpenMM {
*/
*/
class
CpuCalcForcesAndEnergyKernel
:
public
CalcForcesAndEnergyKernel
{
class
CpuCalcForcesAndEnergyKernel
:
public
CalcForcesAndEnergyKernel
{
public:
public:
class
SumForceTask
;
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
);
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
);
/**
/**
* Initialize the kernel.
* Initialize the kernel.
...
@@ -88,9 +90,7 @@ private:
...
@@ -88,9 +90,7 @@ private:
*/
*/
class
CpuCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CpuCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CpuCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CpuCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
);
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
)
{
}
~
CpuCalcNonbondedForceKernel
();
~
CpuCalcNonbondedForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
...
@@ -130,8 +130,8 @@ private:
...
@@ -130,8 +130,8 @@ private:
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
RealVec
>
lastPositions
;
std
::
vector
<
RealVec
>
lastPositions
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
CpuNeighborList
neighborList
;
CpuNeighborList
*
neighborList
;
CpuNonbondedForce
nonbonded
;
CpuNonbondedForce
*
nonbonded
;
Kernel
optimizedPme
;
Kernel
optimizedPme
;
};
};
...
@@ -173,6 +173,43 @@ private:
...
@@ -173,6 +173,43 @@ private:
CpuGBSAOBCForce
obc
;
CpuGBSAOBCForce
obc
;
};
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
class
CpuIntegrateLangevinStepKernel
:
public
IntegrateLangevinStepKernel
{
public:
CpuIntegrateLangevinStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
IntegrateLangevinStepKernel
(
name
,
platform
),
data
(
data
),
dynamics
(
NULL
)
{
}
~
CpuIntegrateLangevinStepKernel
();
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the LangevinIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
void
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
);
private:
CpuPlatform
::
PlatformData
&
data
;
CpuLangevinDynamics
*
dynamics
;
std
::
vector
<
RealOpenMM
>
masses
;
double
prevTemp
,
prevFriction
,
prevStepSize
;
};
}
// namespace OpenMM
}
// namespace OpenMM
#endif
/*OPENMM_CPUKERNELS_H_*/
#endif
/*OPENMM_CPUKERNELS_H_*/
...
...
platforms/cpu/include/CpuLangevinDynamics.h
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __CPU_LANGEVIN_DYNAMICS_H__
#define __CPU_LANGEVIN_DYNAMICS_H__
#include "ReferenceStochasticDynamics.h"
#include "CpuRandom.h"
#include "openmm/internal/ThreadPool.h"
#include "sfmt/SFMT.h"
// ---------------------------------------------------------------------------------------
class
CpuLangevinDynamics
:
public
ReferenceStochasticDynamics
{
public:
class
Update1Task
;
class
Update2Task
;
/**
* Constructor.
*
* @param numberOfAtoms number of atoms
* @param deltaT delta t for dynamics
* @param tau viscosity
* @param temperature temperature
* @param threads thread pool for parallelizing computation
* @param random random number generator
*/
CpuLangevinDynamics
(
int
numberOfAtoms
,
RealOpenMM
deltaT
,
RealOpenMM
tau
,
RealOpenMM
temperature
,
OpenMM
::
ThreadPool
&
threads
,
OpenMM
::
CpuRandom
&
random
);
/**
* Destructor.
*/
~
CpuLangevinDynamics
();
/**
* First update step.
*
* @param numberOfAtoms number of atoms
* @param atomCoordinates atom coordinates
* @param velocities velocities
* @param forces forces
* @param inverseMasses inverse atom masses
* @param xPrime xPrime
*/
void
updatePart1
(
int
numberOfAtoms
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
velocities
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
std
::
vector
<
RealOpenMM
>&
inverseMasses
,
std
::
vector
<
OpenMM
::
RealVec
>&
xPrime
);
/**
* Second update step.
*
* @param numberOfAtoms number of atoms
* @param atomCoordinates atom coordinates
* @param velocities velocities
* @param forces forces
* @param inverseMasses inverse atom masses
* @param xPrime xPrime
*/
void
updatePart2
(
int
numberOfAtoms
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
velocities
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
std
::
vector
<
RealOpenMM
>&
inverseMasses
,
std
::
vector
<
OpenMM
::
RealVec
>&
xPrime
);
private:
void
threadUpdate1
(
int
threadIndex
);
void
threadUpdate2
(
int
threadIndex
);
OpenMM
::
ThreadPool
&
threads
;
OpenMM
::
CpuRandom
&
random
;
std
::
vector
<
OpenMM_SFMT
::
SFMT
>
threadRandom
;
// The following variables are used to make information accessible to the individual threads.
int
numberOfAtoms
;
OpenMM
::
RealVec
*
atomCoordinates
;
OpenMM
::
RealVec
*
velocities
;
OpenMM
::
RealVec
*
forces
;
RealOpenMM
*
inverseMasses
;
OpenMM
::
RealVec
*
xPrime
;
};
// ---------------------------------------------------------------------------------------
#endif // __CPU_LANGEVIN_DYNAMICS_H__
platforms/cpu/include/CpuNeighborList.h
View file @
caefd490
...
@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
...
@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
public:
public:
class
ThreadTask
;
class
ThreadTask
;
class
Voxels
;
class
Voxels
;
static
const
int
BlockSize
;
CpuNeighborList
(
int
blockSize
);
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
void
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
int
getNumBlocks
()
const
;
...
@@ -59,6 +58,7 @@ public:
...
@@ -59,6 +58,7 @@ public:
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
runThread
(
int
index
);
void
runThread
(
int
index
);
private:
private:
int
blockSize
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
caefd490
...
@@ -49,6 +49,12 @@ class CpuNonbondedForce {
...
@@ -49,6 +49,12 @@ class CpuNonbondedForce {
CpuNonbondedForce
();
CpuNonbondedForce
();
/**
* Virtual destructor.
*/
virtual
~
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
Set the force to use a cutoff.
...
@@ -151,7 +157,7 @@ class CpuNonbondedForce {
...
@@ -151,7 +157,7 @@ class CpuNonbondedForce {
*/
*/
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
void
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
);
pr
ivate
:
pr
otected
:
bool
cutoff
;
bool
cutoff
;
bool
useSwitch
;
bool
useSwitch
;
bool
periodic
;
bool
periodic
;
...
@@ -204,7 +210,7 @@ private:
...
@@ -204,7 +210,7 @@ private:
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
virtual
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
=
0
;
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -216,7 +222,7 @@ private:
...
@@ -216,7 +222,7 @@ private:
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
virtual
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
=
0
;
/**
/**
* Compute the displacement and squared distance between two points, optionally using
* Compute the displacement and squared distance between two points, optionally using
...
@@ -224,26 +230,15 @@ private:
...
@@ -224,26 +230,15 @@ private:
*/
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec4
erfcApprox
(
fvec4
x
);
/**
/**
* Create a lookup table for the scale factor used with Ewald and PME.
* Create a lookup table for the scale factor used with Ewald and PME.
*/
*/
void
tabulateEwaldScaleFactor
();
void
tabulateEwaldScaleFactor
();
/**
/**
*
Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*
Compute a fast approximation to erfc(x).
*/
*/
fvec4
ewaldScaleFunction
(
fvec4
x
);
static
float
erfcApprox
(
float
x
);
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#include "CpuNonbondedForce.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec4
:
public
CpuNonbondedForce
{
public:
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec4
erfcApprox
(
fvec4
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4
ewaldScaleFunction
(
fvec4
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
platforms/cpu/include/CpuNonbondedForceVec8.h
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
#ifdef __AVX__
#include "CpuNonbondedForce.h"
#include "openmm/internal/vectorize8.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec8
:
public
CpuNonbondedForce
{
public:
CpuNonbondedForceVec8
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
fvec8
erfcApprox
(
fvec8
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8
ewaldScaleFunction
(
fvec8
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // __AVX__
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
platforms/cpu/include/CpuPlatform.h
View file @
caefd490
...
@@ -33,6 +33,7 @@
...
@@ -33,6 +33,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "AlignedArray.h"
#include "AlignedArray.h"
#include "CpuRandom.h"
#include "ReferencePlatform.h"
#include "ReferencePlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/ThreadPool.h"
...
@@ -74,6 +75,7 @@ public:
...
@@ -74,6 +75,7 @@ public:
std
::
vector
<
AlignedArray
<
float
>
>
threadForce
;
std
::
vector
<
AlignedArray
<
float
>
>
threadForce
;
ThreadPool
threads
;
ThreadPool
threads
;
bool
isPeriodic
;
bool
isPeriodic
;
CpuRandom
random
;
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuRandom.h
0 → 100644
View file @
caefd490
#ifndef OPENMM_CPURANDOM_H_
#define OPENMM_CPURANDOM_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "sfmt/SFMT.h"
#include <vector>
namespace
OpenMM
{
/**
* This class provides a multithreaded random number generator.
*/
class
OPENMM_EXPORT
CpuRandom
{
public:
CpuRandom
();
~
CpuRandom
();
void
initialize
(
int
seed
,
int
numThreads
);
float
getGaussianRandom
(
int
threadIndex
);
float
getUniformRandom
(
int
threadIndex
);
private:
bool
hasInitialized
;
int
randomSeed
;
std
::
vector
<
OpenMM_SFMT
::
SFMT
*>
threadRandom
;
std
::
vector
<
float
>
nextGaussian
;
std
::
vector
<
int
>
nextGaussianIsValid
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CPURANDOM_H_*/
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
caefd490
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDFOREACH
(
file
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
...
@@ -7,6 +11,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...
@@ -7,6 +11,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1
-DOPENMM_CPU_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cpu/src/CpuKernelFactory.cpp
View file @
caefd490
...
@@ -45,5 +45,7 @@ KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform&
...
@@ -45,5 +45,7 @@ KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform&
return
new
CpuCalcNonbondedForceKernel
(
name
,
platform
,
data
);
return
new
CpuCalcNonbondedForceKernel
(
name
,
platform
,
data
);
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
return
new
CpuCalcGBSAOBCForceKernel
(
name
,
platform
,
data
);
return
new
CpuCalcGBSAOBCForceKernel
(
name
,
platform
,
data
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
return
new
CpuIntegrateLangevinStepKernel
(
name
,
platform
,
data
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
}
platforms/cpu/src/CpuKernels.cpp
View file @
caefd490
...
@@ -31,6 +31,7 @@
...
@@ -31,6 +31,7 @@
#include "CpuKernels.h"
#include "CpuKernels.h"
#include "ReferenceBondForce.h"
#include "ReferenceBondForce.h"
#include "ReferenceConstraints.h"
#include "ReferenceKernelFactory.h"
#include "ReferenceKernelFactory.h"
#include "ReferenceKernels.h"
#include "ReferenceKernels.h"
#include "ReferenceLJCoulomb14.h"
#include "ReferenceLJCoulomb14.h"
...
@@ -64,6 +65,71 @@ static RealVec& extractBoxSize(ContextImpl& context) {
...
@@ -64,6 +65,71 @@ static RealVec& extractBoxSize(ContextImpl& context) {
return
*
(
RealVec
*
)
data
->
periodicBoxSize
;
return
*
(
RealVec
*
)
data
->
periodicBoxSize
;
}
}
static
ReferenceConstraints
&
extractConstraints
(
ContextImpl
&
context
)
{
ReferencePlatform
::
PlatformData
*
data
=
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
return
*
(
ReferenceConstraints
*
)
data
->
constraints
;
}
/**
* Compute the kinetic energy of the system, possibly shifting the velocities in time to account
* for a leapfrog integrator.
*/
static
double
computeShiftedKineticEnergy
(
ContextImpl
&
context
,
vector
<
double
>&
masses
,
double
timeShift
)
{
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
velData
=
extractVelocities
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
// Compute the shifted velocities.
vector
<
RealVec
>
shiftedVel
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
if
(
masses
[
i
]
>
0
)
shiftedVel
[
i
]
=
velData
[
i
]
+
forceData
[
i
]
*
(
timeShift
/
masses
[
i
]);
else
shiftedVel
[
i
]
=
velData
[
i
];
}
// Apply constraints to them.
vector
<
double
>
inverseMasses
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
inverseMasses
[
i
]
=
(
masses
[
i
]
==
0
?
0
:
1
/
masses
[
i
]);
extractConstraints
(
context
).
applyToVelocities
(
posData
,
shiftedVel
,
inverseMasses
,
1e-4
);
// Compute the kinetic energy.
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
if
(
masses
[
i
]
>
0
)
energy
+=
masses
[
i
]
*
(
shiftedVel
[
i
].
dot
(
shiftedVel
[
i
]));
return
0.5
*
energy
;
}
class
CpuCalcForcesAndEnergyKernel
::
SumForceTask
:
public
ThreadPool
::
Task
{
public:
SumForceTask
(
int
numParticles
,
vector
<
RealVec
>&
forceData
,
CpuPlatform
::
PlatformData
&
data
)
:
numParticles
(
numParticles
),
forceData
(
forceData
),
data
(
data
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Sum the contributions to forces that have been calculated by different threads.
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
}
int
numParticles
;
vector
<
RealVec
>&
forceData
;
CpuPlatform
::
PlatformData
&
data
;
};
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
// Create a Reference platform version of this kernel.
// Create a Reference platform version of this kernel.
...
@@ -111,17 +177,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
...
@@ -111,17 +177,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
// Sum the forces from all the threads.
// Sum the forces from all the threads.
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
SumForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
extractForces
(
context
),
data
);
int
numThreads
=
data
.
threads
.
getNumThreads
();
data
.
threads
.
execute
(
task
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
data
.
threads
.
waitForThreads
();
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
}
...
@@ -145,6 +203,22 @@ private:
...
@@ -145,6 +203,22 @@ private:
int
numParticles
;
int
numParticles
;
};
};
bool
isVec8Supported
();
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
neighborList
(
NULL
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
{
neighborList
=
new
CpuNeighborList
(
8
);
nonbonded
=
createCpuNonbondedForceVec8
();
}
else
{
neighborList
=
new
CpuNeighborList
(
4
);
nonbonded
=
createCpuNonbondedForceVec4
();
}
}
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
if
(
bonded14ParamArray
!=
NULL
)
{
if
(
bonded14ParamArray
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
...
@@ -154,6 +228,10 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
...
@@ -154,6 +228,10 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
delete
bonded14IndexArray
;
delete
bonded14IndexArray
;
delete
bonded14ParamArray
;
delete
bonded14ParamArray
;
}
}
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
}
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
@@ -305,26 +383,26 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -305,26 +383,26 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
}
if
(
needRecompute
)
{
if
(
needRecompute
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
neighborList
->
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
lastPositions
=
posData
;
lastPositions
=
posData
;
}
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
,
rfDielectric
);
}
}
if
(
data
.
isPeriodic
)
{
if
(
data
.
isPeriodic
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
nonbonded
.
setPeriodic
(
floatBoxSize
);
nonbonded
->
setPeriodic
(
floatBoxSize
);
}
}
if
(
ewald
)
if
(
ewald
)
nonbonded
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
nonbonded
->
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
if
(
pme
)
nonbonded
.
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
if
(
useSwitchingFunction
)
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
nonbonded
->
setUseSwitchingFunction
(
switchingDistance
);
double
nonbondedEnergy
=
0
;
double
nonbondedEnergy
=
0
;
if
(
includeDirect
)
if
(
includeDirect
)
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
nonbonded
->
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
if
(
includeReciprocal
)
{
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
...
@@ -333,7 +411,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -333,7 +411,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
}
else
else
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
->
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
}
energy
+=
nonbondedEnergy
;
energy
+=
nonbondedEnergy
;
if
(
includeDirect
)
{
if
(
includeDirect
)
{
...
@@ -440,3 +518,45 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
...
@@ -440,3 +518,45 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
}
}
obc
.
setParticleParameters
(
particleParams
);
obc
.
setParticleParameters
(
particleParams
);
}
}
CpuIntegrateLangevinStepKernel
::~
CpuIntegrateLangevinStepKernel
()
{
if
(
dynamics
)
delete
dynamics
;
}
void
CpuIntegrateLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
)
{
int
numParticles
=
system
.
getNumParticles
();
masses
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
masses
[
i
]
=
static_cast
<
RealOpenMM
>
(
system
.
getParticleMass
(
i
));
data
.
random
.
initialize
(
integrator
.
getRandomNumberSeed
(),
data
.
threads
.
getNumThreads
());
}
void
CpuIntegrateLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
velData
=
extractVelocities
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
if
(
dynamics
==
0
||
temperature
!=
prevTemp
||
friction
!=
prevFriction
||
stepSize
!=
prevStepSize
)
{
// Recreate the computation objects with the new parameters.
if
(
dynamics
)
delete
dynamics
;
RealOpenMM
tau
=
(
friction
==
0.0
?
0.0
:
1.0
/
friction
);
dynamics
=
new
CpuLangevinDynamics
(
context
.
getSystem
().
getNumParticles
(),
stepSize
,
tau
,
temperature
,
data
.
threads
,
data
.
random
);
dynamics
->
setReferenceConstraintAlgorithm
(
&
extractConstraints
(
context
));
prevTemp
=
temperature
;
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
}
dynamics
->
update
(
context
.
getSystem
(),
posData
,
velData
,
forceData
,
masses
,
integrator
.
getConstraintTolerance
());
ReferencePlatform
::
PlatformData
*
refData
=
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
refData
->
time
+=
stepSize
;
refData
->
stepCount
++
;
}
double
CpuIntegrateLangevinStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
return
computeShiftedKineticEnergy
(
context
,
masses
,
0.5
*
integrator
.
getStepSize
());
}
platforms/cpu/src/CpuLangevinDynamics.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuLangevinDynamics.h"
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuLangevinDynamics
::
Update1Task
:
public
ThreadPool
::
Task
{
public:
Update1Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate1
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
class
CpuLangevinDynamics
::
Update2Task
:
public
ThreadPool
::
Task
{
public:
Update2Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate2
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
CpuLangevinDynamics
::
CpuLangevinDynamics
(
int
numberOfAtoms
,
RealOpenMM
deltaT
,
RealOpenMM
tau
,
RealOpenMM
temperature
,
ThreadPool
&
threads
,
CpuRandom
&
random
)
:
ReferenceStochasticDynamics
(
numberOfAtoms
,
deltaT
,
tau
,
temperature
),
threads
(
threads
),
random
(
random
)
{
}
CpuLangevinDynamics
::~
CpuLangevinDynamics
()
{
}
void
CpuLangevinDynamics
::
updatePart1
(
int
numberOfAtoms
,
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
velocities
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
inverseMasses
,
vector
<
RealVec
>&
xPrime
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
velocities
=
&
velocities
[
0
];
this
->
forces
=
&
forces
[
0
];
this
->
inverseMasses
=
&
inverseMasses
[
0
];
this
->
xPrime
=
&
xPrime
[
0
];
// Signal the threads to start running and wait for them to finish.
Update1Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuLangevinDynamics
::
updatePart2
(
int
numberOfAtoms
,
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
velocities
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
inverseMasses
,
vector
<
RealVec
>&
xPrime
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
velocities
=
&
velocities
[
0
];
this
->
forces
=
&
forces
[
0
];
this
->
inverseMasses
=
&
inverseMasses
[
0
];
this
->
xPrime
=
&
xPrime
[
0
];
// Signal the threads to start running and wait for them to finish.
Update2Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuLangevinDynamics
::
threadUpdate1
(
int
threadIndex
)
{
const
RealOpenMM
tau
=
getTau
();
const
RealOpenMM
vscale
=
EXP
(
-
getDeltaT
()
/
tau
);
const
RealOpenMM
fscale
=
(
1
-
vscale
)
*
tau
;
const
RealOpenMM
kT
=
BOLTZ
*
getTemperature
();
const
RealOpenMM
noisescale
=
SQRT
(
2
*
kT
/
tau
)
*
SQRT
(
0.5
*
(
1
-
vscale
*
vscale
)
*
tau
);
int
start
=
threadIndex
*
numberOfAtoms
/
threads
.
getNumThreads
();
int
end
=
(
threadIndex
+
1
)
*
numberOfAtoms
/
threads
.
getNumThreads
();
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
if
(
inverseMasses
[
i
]
!=
0.0
)
{
RealOpenMM
sqrtInvMass
=
SQRT
(
inverseMasses
[
i
]);
RealVec
noise
(
random
.
getGaussianRandom
(
threadIndex
),
random
.
getGaussianRandom
(
threadIndex
),
random
.
getGaussianRandom
(
threadIndex
));
velocities
[
i
]
=
velocities
[
i
]
*
vscale
+
forces
[
i
]
*
(
fscale
*
inverseMasses
[
i
])
+
noise
*
(
noisescale
*
sqrtInvMass
);
}
}
}
void
CpuLangevinDynamics
::
threadUpdate2
(
int
threadIndex
)
{
const
RealOpenMM
dt
=
getDeltaT
();
int
start
=
threadIndex
*
numberOfAtoms
/
threads
.
getNumThreads
();
int
end
=
(
threadIndex
+
1
)
*
numberOfAtoms
/
threads
.
getNumThreads
();
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
if
(
inverseMasses
[
i
]
!=
0.0
)
{
RealOpenMM
sqrtInvMass
=
SQRT
(
inverseMasses
[
i
]);
xPrime
[
i
]
=
atomCoordinates
[
i
]
+
velocities
[
i
]
*
dt
;
}
}
}
platforms/cpu/src/CpuNeighborList.cpp
View file @
caefd490
...
@@ -43,8 +43,6 @@ using namespace std;
...
@@ -43,8 +43,6 @@ using namespace std;
namespace
OpenMM
{
namespace
OpenMM
{
const
int
CpuNeighborList
::
BlockSize
=
4
;
class
VoxelIndex
class
VoxelIndex
{
{
public:
public:
...
@@ -62,8 +60,8 @@ public:
...
@@ -62,8 +60,8 @@ public:
*/
*/
class
CpuNeighborList
::
Voxels
{
class
CpuNeighborList
::
Voxels
{
public:
public:
Voxels
(
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
Voxels
(
int
blockSize
,
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
blockSize
(
blockSize
),
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
...
@@ -156,7 +154,7 @@ public:
...
@@ -156,7 +154,7 @@ public:
return
VoxelIndex
(
x
,
y
);
return
VoxelIndex
(
x
,
y
);
}
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
blockAtoms
,
const
float
*
atomLocations
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
&
blockAtoms
,
const
float
*
atomLocations
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
@@ -175,9 +173,6 @@ public:
...
@@ -175,9 +173,6 @@ public:
float
centerPos
[
4
];
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
VoxelIndex
atomVoxelIndex
[
BlockSize
];
for
(
int
i
=
0
;
i
<
(
int
)
blockAtoms
.
size
();
i
++
)
atomVoxelIndex
[
i
]
=
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
i
]]);
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
...
@@ -193,7 +188,7 @@ public:
...
@@ -193,7 +188,7 @@ public:
endx
=
min
(
endx
,
nx
-
1
);
endx
=
min
(
endx
,
nx
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
}
}
int
lastSortedIndex
=
B
lockSize
*
(
blockIndex
+
1
);
int
lastSortedIndex
=
b
lockSize
*
(
blockIndex
+
1
);
VoxelIndex
voxelIndex
(
0
,
0
);
VoxelIndex
voxelIndex
(
0
,
0
);
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
voxelIndex
.
x
=
x
;
voxelIndex
.
x
=
x
;
...
@@ -300,10 +295,12 @@ public:
...
@@ -300,10 +295,12 @@ public:
// Add this atom to the list of neighbors.
// Add this atom to the list of neighbors.
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
if
(
sortedIndex
<
B
lockSize
*
blockIndex
)
if
(
sortedIndex
<
b
lockSize
*
blockIndex
)
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
0
);
else
else
{
exclusions
.
push_back
(
0xF
&
(
0xF
<<
(
sortedIndex
-
BlockSize
*
blockIndex
)));
int
mask
=
(
1
<<
blockSize
)
-
1
;
exclusions
.
push_back
(
mask
&
(
mask
<<
(
sortedIndex
-
blockSize
*
blockIndex
)));
}
}
}
}
}
}
}
...
@@ -311,6 +308,7 @@ public:
...
@@ -311,6 +308,7 @@ public:
}
}
private:
private:
int
blockSize
;
float
voxelSizeX
,
voxelSizeY
;
float
voxelSizeX
,
voxelSizeY
;
float
minx
,
maxx
,
miny
,
maxy
;
float
minx
,
maxx
,
miny
,
maxy
;
int
nx
,
ny
;
int
nx
,
ny
;
...
@@ -329,12 +327,12 @@ public:
...
@@ -329,12 +327,12 @@ public:
CpuNeighborList
&
owner
;
CpuNeighborList
&
owner
;
};
};
CpuNeighborList
::
CpuNeighborList
()
{
CpuNeighborList
::
CpuNeighborList
(
int
blockSize
)
:
blockSize
(
blockSize
)
{
}
}
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
int
numBlocks
=
(
numAtoms
+
B
lockSize
-
1
)
/
B
lockSize
;
int
numBlocks
=
(
numAtoms
+
b
lockSize
-
1
)
/
b
lockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
sortedAtoms
.
resize
(
numAtoms
);
...
@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
edgeSizeX
=
0.6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeX
=
0.6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
edgeSizeY
=
0.6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
}
}
Voxels
voxels
(
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
Voxels
voxels
(
blockSize
,
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
sortedAtoms
[
i
]
=
atomIndex
;
...
@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Add padding atoms to fill up the last block.
// Add padding atoms to fill up the last block.
int
numPadding
=
numBlocks
*
B
lockSize
-
numAtoms
;
int
numPadding
=
numBlocks
*
b
lockSize
-
numAtoms
;
if
(
numPadding
>
0
)
{
if
(
numPadding
>
0
)
{
char
mask
=
(
0xF
0
>>
numPadding
)
&
0xF
;
char
mask
=
(
(
0xF
FFF
-
(
1
<<
blockSize
)
+
1
)
>>
numPadding
);
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
sortedAtoms
.
push_back
(
0
);
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
...
@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
}
}
int
CpuNeighborList
::
getNumBlocks
()
const
{
int
CpuNeighborList
::
getNumBlocks
()
const
{
return
sortedAtoms
.
size
()
/
B
lockSize
;
return
sortedAtoms
.
size
()
/
b
lockSize
;
}
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
...
@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numBlocks
=
blockNeighbors
.
size
();
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
int
>
blockAtoms
;
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
// Find the atoms in this block and compute their bounding box.
int
firstIndex
=
B
lockSize
*
i
;
int
firstIndex
=
b
lockSize
*
i
;
int
atomsInBlock
=
min
(
B
lockSize
,
numAtoms
-
firstIndex
);
int
atomsInBlock
=
min
(
b
lockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
atomVoxelIndex
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
...
@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
minPos
=
min
(
minPos
,
pos
);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
// Record the exclusions for this block.
// Record the exclusions for this block.
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
caefd490
...
@@ -29,7 +29,6 @@
...
@@ -29,7 +29,6 @@
#include "CpuNonbondedForce.h"
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "ReferencePME.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
#include "gmx_atomic.h"
// In case we're using some primitive version of Visual Studio this will
// In case we're using some primitive version of Visual Studio this will
...
@@ -61,6 +60,9 @@ public:
...
@@ -61,6 +60,9 @@ public:
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
)
{
}
}
CpuNonbondedForce
::~
CpuNonbondedForce
()
{
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
Set the force to use a cutoff.
...
@@ -356,7 +358,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
...
@@ -356,7 +358,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float
inverseR
=
1
/
r
;
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)
[
0
]
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
fvec4
result
=
deltaR
*
dEdR
;
...
@@ -446,228 +448,6 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
...
@@ -446,228 +448,6 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
}
void
CpuNonbondedForce
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
false
;
if
(
periodic
)
{
for
(
int
i
=
0
;
i
<
4
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
false
;
for
(
int
i
=
0
;
i
<
4
&&
!
needPeriodic
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
if
(
blockAtomPosq
[
i
][
j
]
-
cutoffDistance
<
0.0
||
blockAtomPosq
[
i
][
j
]
+
cutoffDistance
>
boxSize
[
j
])
{
needPeriodic
=
true
;
break
;
}
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
1.0
f
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
if
(
periodic
)
{
...
@@ -677,41 +457,15 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
...
@@ -677,41 +457,15 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
r2
=
dot3
(
deltaR
,
deltaR
);
}
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
float
CpuNonbondedForce
::
erfcApprox
(
float
x
)
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForce
::
erfcApprox
(
fvec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
// error of 3e-7.
f
vec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
f
loat
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
return
1.0
f
/
(
t
*
t
);
}
}
fvec4
CpuNonbondedForce
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec4.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
using
namespace
std
;
using
namespace
OpenMM
;
/**
* Factory method to create a CpuNonbondedForceVec4.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec4
()
{
return
new
CpuNonbondedForceVec4
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec4 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForceVec4
::
erfcApprox
(
fvec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec4
CpuNonbondedForceVec4
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "openmm/internal/hardware.h"
using
namespace
std
;
using
namespace
OpenMM
;
#ifndef __AVX__
bool
isVec8Supported
()
{
return
false
;
}
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
throw
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
}
#else
/**
* Check whether 8 component vectors are supported with the current CPU.
*/
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
28
))
!=
0
);
}
return
false
;
}
/**
* Factory method to create a CpuNonbondedForceVec8.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
return
new
CpuNonbondedForceVec8
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec8 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
fx
=
dx
*
dEdR
;
fvec8
fy
=
dy
*
dEdR
;
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
fx
=
dx
*
dEdR
;
fvec8
fy
=
dy
*
dEdR
;
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec8
CpuNonbondedForceVec8
::
erfcApprox
(
fvec8
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec8
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec8
CpuNonbondedForceVec8
::
ewaldScaleFunction
(
fvec8
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec8
x1
=
x
*
ewaldDXInv
;
ivec8
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
x1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
ewaldScaleTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
ewaldScaleTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
ewaldScaleTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
ewaldScaleTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
ewaldScaleTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
platforms/cpu/src/CpuPlatform.cpp
View file @
caefd490
...
@@ -53,6 +53,7 @@ CpuPlatform::CpuPlatform() {
...
@@ -53,6 +53,7 @@ CpuPlatform::CpuPlatform() {
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
}
}
double
CpuPlatform
::
getSpeed
()
const
{
double
CpuPlatform
::
getSpeed
()
const
{
...
...
platforms/cpu/src/CpuRandom.cpp
0 → 100644
View file @
caefd490
/* Portions copyright (c) 2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuRandom.h"
#include "openmm/OpenMMException.h"
#include <cmath>
using
namespace
std
;
using
namespace
OpenMM
;
CpuRandom
::
CpuRandom
()
:
hasInitialized
(
false
)
{
}
CpuRandom
::~
CpuRandom
()
{
for
(
int
i
=
0
;
i
<
threadRandom
.
size
();
i
++
)
delete
threadRandom
[
i
];
}
void
CpuRandom
::
initialize
(
int
seed
,
int
numThreads
)
{
if
(
hasInitialized
)
{
if
(
seed
==
randomSeed
)
return
;
// Already initialized with the same seed.
throw
OpenMMException
(
"Random number generator initialized twice with different seeds"
);
}
randomSeed
=
seed
;
hasInitialized
=
true
;
threadRandom
.
resize
(
numThreads
);
nextGaussian
.
resize
(
numThreads
);
nextGaussianIsValid
.
resize
(
numThreads
,
false
);
// Use a quick and dirty RNG to pick seeds for the real random number generator.
unsigned
int
r
=
(
unsigned
int
)
seed
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
threadRandom
[
i
]
=
new
OpenMM_SFMT
::
SFMT
();
init_gen_rand
(
r
,
*
threadRandom
[
i
]);
}
}
float
CpuRandom
::
getGaussianRandom
(
int
threadIndex
)
{
if
(
nextGaussianIsValid
[
threadIndex
])
{
nextGaussianIsValid
[
threadIndex
]
=
false
;
return
nextGaussian
[
threadIndex
];
}
// Use the polar form of the Box-Muller transformation to generate two Gaussian random numbers.
float
x
,
y
,
r2
;
do
{
x
=
2.0
f
*
(
float
)
genrand_real2
(
*
threadRandom
[
threadIndex
])
-
1.0
f
;
y
=
2.0
f
*
(
float
)
genrand_real2
(
*
threadRandom
[
threadIndex
])
-
1.0
f
;
r2
=
x
*
x
+
y
*
y
;
}
while
(
r2
>=
1.0
f
||
r2
==
0.0
f
);
float
multiplier
=
sqrtf
((
-
2.0
f
*
logf
(
r2
))
/
r2
);
nextGaussian
[
threadIndex
]
=
y
*
multiplier
;
nextGaussianIsValid
[
threadIndex
]
=
true
;
return
x
*
multiplier
;
}
float
CpuRandom
::
getUniformRandom
(
int
threadIndex
)
{
return
genrand_real2
(
*
threadRandom
[
threadIndex
]);
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment