Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
71d33617
Commit
71d33617
authored
Apr 24, 2015
by
John Chodera (MSKCC)
Browse files
Merge remote-tracking branch 'upstream/master'
parents
eb232608
9da36463
Changes
127
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
599 additions
and
328 deletions
+599
-328
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+12
-0
openmmapi/src/CMAPTorsionForce.cpp
openmmapi/src/CMAPTorsionForce.cpp
+4
-1
openmmapi/src/CMAPTorsionForceImpl.cpp
openmmapi/src/CMAPTorsionForceImpl.cpp
+6
-2
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-1
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+6
-6
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+6
-6
platforms/cpu/src/CpuCustomManyParticleForce.cpp
platforms/cpu/src/CpuCustomManyParticleForce.cpp
+1
-1
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+71
-41
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+137
-43
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+139
-45
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+12
-3
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+8
-1
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+40
-142
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+64
-24
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+5
-0
platforms/cuda/src/kernels/customIntegratorGlobal.cu
platforms/cuda/src/kernels/customIntegratorGlobal.cu
+1
-1
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+1
-1
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
+58
-1
platforms/opencl/include/OpenCLFFT3D.h
platforms/opencl/include/OpenCLFFT3D.h
+14
-6
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+12
-3
No files found.
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
71d33617
...
...
@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
fvec4
y
(
_mm_rsqrt_ps
(
v
.
val
));
// Perform an iteration of Newton refinement.
fvec4
x2
=
v
*
0.5
f
;
y
*=
fvec4
(
1.5
f
)
-
x2
*
y
*
y
;
return
y
;
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
...
...
openmmapi/src/CMAPTorsionForce.cpp
View file @
71d33617
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors.
*
* Portions copyright (c) 2010
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -96,3 +96,6 @@ ForceImpl* CMAPTorsionForce::createImpl() const {
return
new
CMAPTorsionForceImpl
(
*
this
);
}
void
CMAPTorsionForce
::
updateParametersInContext
(
Context
&
context
)
{
dynamic_cast
<
CMAPTorsionForceImpl
&>
(
getImplInContext
(
context
)).
updateParametersInContext
(
getContextImpl
(
context
));
}
openmmapi/src/CMAPTorsionForceImpl.cpp
View file @
71d33617
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors.
*
* Portions copyright (c) 2010
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -154,4 +154,8 @@ void CMAPTorsionForceImpl::calcMapDerivatives(int size, const vector<double>& en
}
}
}
}
\ No newline at end of file
}
void
CMAPTorsionForceImpl
::
updateParametersInContext
(
ContextImpl
&
context
)
{
kernel
.
getAs
<
CalcCMAPTorsionForceKernel
>
().
copyParametersToContext
(
context
,
owner
);
}
platforms/cpu/include/CpuNeighborList.h
View file @
71d33617
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -61,6 +61,7 @@ public:
private:
int
blockSize
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
float
>
sortedPositions
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
// The following variables are used to make information accessible to the individual threads.
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
View file @
71d33617
...
...
@@ -56,8 +56,8 @@ protected:
/**
* Templatized implementation of calculateBlockIxn.
*/
template
<
bool
TRICLINIC
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
...
...
@@ -74,15 +74,15 @@ protected:
/**
* Templatized implementation of calculateBlockEwaldIxn.
*/
template
<
bool
TRICLINIC
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template
<
bool
TRICLINIC
>
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/include/CpuNonbondedForceVec8.h
View file @
71d33617
...
...
@@ -55,8 +55,8 @@ protected:
/**
* Templatized implementation of calculateBlockIxn.
*/
template
<
bool
TRICLINIC
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
...
...
@@ -73,15 +73,15 @@ protected:
/**
* Templatized implementation of calculateBlockEwaldIxn.
*/
template
<
bool
TRICLINIC
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template
<
bool
TRICLINIC
>
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/src/CpuCustomManyParticleForce.cpp
View file @
71d33617
...
...
@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
}
void
CpuCustomManyParticleForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
assert
(
c
utoff
);
assert
(
useC
utoff
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
1
][
1
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
2
][
2
]
>=
2.0
*
cutoffDistance
);
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
71d33617
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -164,8 +164,8 @@ public:
return
VoxelIndex
(
y
,
z
);
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
float
*
atomLoca
tions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
vector
<
float
>&
blockAtomX
,
const
vector
<
float
>&
blockAtomY
,
const
vector
<
float
>&
blockAtomZ
,
const
vector
<
float
>&
sortedPosi
tions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
...
@@ -233,24 +233,34 @@ public:
float
minx
=
centerPos
[
0
];
float
maxx
=
centerPos
[
0
];
fvec4
offset
(
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
),
0
);
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
const
float
*
atomPos
=
&
atomLocations
[
4
*
blockAtoms
[
k
]];
fvec4
posVec
(
atomPos
);
fvec4
delta1
=
offset
-
posVec
;
fvec4
delta2
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
voxelSizeZ
,
0
);
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
float
offset
[
3
]
=
{
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
)};
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
+=
4
)
{
fvec4
dist2
=
maxDistanceSquared
;
if
(
y
!=
atomVoxelIndex
[
k
].
y
)
{
fvec4
dy1
=
offset
[
1
]
-
fvec4
(
&
blockAtomY
[
k
]);
fvec4
dy2
=
dy1
+
voxelSizeY
;
if
(
usePeriodic
)
{
dy1
-=
round
(
dy1
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dy2
-=
round
(
dy2
*
invBoxSize
[
1
])
*
boxSize
[
1
];
}
fvec4
dy
=
min
(
abs
(
dy1
),
abs
(
dy2
));
dist2
-=
dy
*
dy
;
}
if
(
z
!=
atomVoxelIndex
[
k
].
z
)
{
fvec4
dz1
=
offset
[
2
]
-
fvec4
(
&
blockAtomZ
[
k
]);
fvec4
dz2
=
dz1
+
voxelSizeZ
;
if
(
usePeriodic
)
{
dz1
-=
round
(
dz1
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz2
-=
round
(
dz2
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
fvec4
dz
=
min
(
abs
(
dz1
),
abs
(
dz2
));
dist2
-=
dz
*
dz
;
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
float
dz
=
(
z
==
atomVoxelIndex
[
k
].
z
?
0.0
f
:
delta
[
2
]);
float
dist2
=
maxDistanceSquared
-
dy
*
dy
-
dz
*
dz
;
if
(
dist2
>
0
)
{
float
dist
=
sqrtf
(
dist2
);
minx
=
min
(
minx
,
atomPos
[
0
]
-
dist
-
xoffset
);
maxx
=
max
(
maxx
,
atomPos
[
0
]
+
dist
-
xoffset
);
fvec4
dist
=
sqrt
(
dist2
);
int
numToCheck
=
min
(
4
,
(
int
)
(
blockAtoms
.
size
()
-
k
));
for
(
int
m
=
0
;
m
<
numToCheck
;
m
++
)
{
minx
=
min
(
minx
,
blockAtomX
[
k
+
m
]
-
dist
[
m
]
-
xoffset
);
maxx
=
max
(
maxx
,
blockAtomX
[
k
+
m
]
+
dist
[
m
]
-
xoffset
);
}
}
if
(
minx
==
maxx
)
...
...
@@ -290,12 +300,10 @@ public:
if
(
sortedIndex
>=
lastSortedIndex
)
continue
;
fvec4
atomPos
(
atomLocations
+
4
*
sortedAtoms
[
sortedIndex
]);
fvec4
atomPos
(
&
sortedPositions
[
4
*
sortedIndex
]);
fvec4
delta
=
atomPos
-
centerPos
;
if
(
periodicRectangular
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
if
(
periodicRectangular
)
delta
-=
round
(
delta
*
invBoxSize
)
*
boxSize
;
else
if
(
needPeriodic
)
{
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
...
...
@@ -310,26 +318,34 @@ public:
// The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure.
bool
any
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
fvec4
pos1
(
&
atomLocations
[
4
*
blockAtoms
[
k
]]);
delta
=
atomPos
-
pos1
;
bool
anyInteraction
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
+=
4
)
{
fvec4
dx
=
fvec4
(
&
blockAtomX
[
k
])
-
atomPos
[
0
];
fvec4
dy
=
fvec4
(
&
blockAtomY
[
k
])
-
atomPos
[
1
];
fvec4
dz
=
fvec4
(
&
blockAtomZ
[
k
])
-
atomPos
[
2
];
if
(
periodicRectangular
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
else
if
(
needPeriodic
)
{
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
f
loat
r2
=
d
ot3
(
delta
,
delta
)
;
if
(
r2
<
maxDistanceSquared
)
{
any
=
true
;
f
vec4
r2
=
d
x
*
dx
+
dy
*
dy
+
dz
*
dz
;
if
(
any
(
r2
<
maxDistanceSquared
)
)
{
any
Interaction
=
true
;
break
;
}
}
if
(
!
any
)
if
(
!
any
Interaction
)
continue
;
}
...
...
@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
sortedPositions
.
resize
(
4
*
numAtoms
);
// Record the parameters for the threads.
...
...
@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
fvec4
atomPos
(
&
atomLocations
[
4
*
atomIndex
]);
atomPos
.
store
(
&
sortedPositions
[
4
*
i
]);
voxels
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
}
voxels
.
sortItems
();
...
...
@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
float
>
blockAtomX
(
blockSize
),
blockAtomY
(
blockSize
),
blockAtomZ
(
blockSize
);
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
...
...
@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]
]
);
fvec4
minPos
(
&
sortedPositions
[
4
*
firstIndex
]);
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]
]);
fvec4
pos
(
&
sortedPositions
[
4
*
(
firstIndex
+
j
)
]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtomX
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)];
blockAtomY
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)
+
1
];
blockAtomZ
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)
+
2
];
}
for
(
int
j
=
atomsInBlock
;
j
<
blockSize
;
j
++
)
{
blockAtomX
[
j
]
=
1e10
;
blockAtomY
[
j
]
=
1e10
;
blockAtomZ
[
j
]
=
1e10
;
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
sortedPositions
,
atomVoxelIndex
);
// Record the exclusions for this block.
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
71d33617
...
...
@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
calculateBlockIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
calculateBlockIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
bool
TRICLINIC
>
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
...
...
@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
...
...
@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
rsqrt
(
r2
);
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
r
=
r2
*
inverseR
;
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
...
@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
calculateBlockEwaldIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
calculateBlockEwaldIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
bool
TRICLINIC
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
...
...
@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
...
...
@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
inverseR
=
r
sqrt
(
r2
);
fvec4
r
=
r2
*
inverseR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
template
<
bool
TRICLINIC
>
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
if
(
TRICLINIC
)
{
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
else
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
71d33617
...
...
@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
*/
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
...
...
@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
calculateBlockIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
calculateBlockIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
8
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
bool
TRICLINIC
>
void
CpuNonbondedForceVec8
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
...
...
@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
...
...
@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
inverseR
=
rsqrt
(
r2
);
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
r
=
r2
*
inverseR
;
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
...
@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
calculateBlockEwaldIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
calculateBlockEwaldIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
8
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
bool
TRICLINIC
>
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
...
...
@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
...
...
@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
inverseR
=
r
sqrt
(
r2
);
fvec8
r
=
r2
*
inverseR
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
...
...
@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
...
...
@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
template
<
bool
TRICLINIC
>
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
if
(
TRICLINIC
)
{
fvec8
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec8
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec8
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
else
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
fvec8
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec8
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec8
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
...
...
platforms/cuda/include/CudaKernels.h
View file @
71d33617
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -520,11 +520,19 @@ public:
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
private:
int
numTorsions
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
const
System
&
system
;
std
::
vector
<
int2
>
mapPositionsVec
;
CudaArray
*
coefficients
;
CudaArray
*
mapPositions
;
CudaArray
*
torsionMaps
;
...
...
@@ -1274,7 +1282,8 @@ private:
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
recordChangedParameters
(
ContextImpl
&
context
);
CudaContext
&
cu
;
double
prevStepSize
;
double
prevStepSize
,
energy
;
float
energyFloat
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
mutable
bool
localValuesAreCurrent
;
...
...
@@ -1295,7 +1304,7 @@ private:
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
std
::
vector
<
void
*>
kineticEnergyArgs
;
CUfunction
sumPotentialEnergyKernel
,
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
CUfunction
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
...
...
platforms/cuda/include/CudaParallelKernels.h
View file @
71d33617
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -345,6 +345,13 @@ public:
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
...
...
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
71d33617
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -32,6 +32,7 @@
#include "openmm/VirtualSite.h"
#include "quern.h"
#include "CudaExpressionUtilities.h"
#include "ReferenceCCMAAlgorithm.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
...
...
@@ -304,157 +305,54 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
int
numCCMA
=
(
int
)
ccmaConstraints
.
size
();
if
(
numCCMA
>
0
)
{
vector
<
vector
<
int
>
>
atomConstraints
(
context
.
getNumAtoms
());
// Record information needed by ReferenceCCMAAlgorithm.
vector
<
pair
<
int
,
int
>
>
refIndices
(
numCCMA
);
vector
<
RealOpenMM
>
refDistance
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
atomConstraints
[
atom1
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom2
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
int
index
=
ccmaConstraints
[
i
];
refIndices
[
i
]
=
make_pair
(
atom1
[
index
],
atom2
[
index
]);
refDistance
[
i
]
=
distance
[
index
];
}
vector
<
vector
<
int
>
>
linkedConstraints
(
numCCMA
);
for
(
unsigned
atom
=
0
;
atom
<
atomConstraints
.
size
();
atom
++
)
{
for
(
unsigned
i
=
0
;
i
<
atomConstraints
[
atom
].
size
();
i
++
)
for
(
unsigned
j
=
0
;
j
<
i
;
j
++
)
{
int
c1
=
atomConstraints
[
atom
][
i
];
int
c2
=
atomConstraints
[
atom
][
j
];
linkedConstraints
[
c1
].
push_back
(
c2
);
linkedConstraints
[
c2
].
push_back
(
c1
);
vector
<
RealOpenMM
>
refMasses
(
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
++
i
)
refMasses
[
i
]
=
(
RealOpenMM
)
system
.
getParticleMass
(
i
);
// Look up angles for CCMA.
vector
<
ReferenceCCMAAlgorithm
::
AngleInfo
>
angles
;
for
(
int
i
=
0
;
i
<
system
.
getNumForces
();
i
++
)
{
const
HarmonicAngleForce
*
force
=
dynamic_cast
<
const
HarmonicAngleForce
*>
(
&
system
.
getForce
(
i
));
if
(
force
!=
NULL
)
{
for
(
int
j
=
0
;
j
<
force
->
getNumAngles
();
j
++
)
{
int
atom1
,
atom2
,
atom3
;
double
angle
,
k
;
force
->
getAngleParameters
(
j
,
atom1
,
atom2
,
atom3
,
angle
,
k
);
angles
.
push_back
(
ReferenceCCMAAlgorithm
::
AngleInfo
(
atom1
,
atom2
,
atom3
,
(
RealOpenMM
)
angle
));
}
}
int
maxLinks
=
0
;
for
(
unsigned
i
=
0
;
i
<
linkedConstraints
.
size
();
i
++
)
maxLinks
=
max
(
maxLinks
,
(
int
)
linkedConstraints
[
i
].
size
());
int
maxAtomConstraints
=
0
;
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Compute the constraint coupling matrix
vector
<
vector
<
int
>
>
atomAngles
(
numAtoms
);
HarmonicAngleForce
const
*
angleForce
=
NULL
;
for
(
int
i
=
0
;
i
<
system
.
getNumForces
()
&&
angleForce
==
NULL
;
i
++
)
angleForce
=
dynamic_cast
<
HarmonicAngleForce
const
*>
(
&
system
.
getForce
(
i
));
if
(
angleForce
!=
NULL
)
for
(
int
i
=
0
;
i
<
angleForce
->
getNumAngles
();
i
++
)
{
int
particle1
,
particle2
,
particle3
;
double
angle
,
k
;
angleForce
->
getAngleParameters
(
i
,
particle1
,
particle2
,
particle3
,
angle
,
k
);
atomAngles
[
particle2
].
push_back
(
i
);
}
vector
<
vector
<
pair
<
int
,
double
>
>
>
matrix
(
numCCMA
);
for
(
int
j
=
0
;
j
<
numCCMA
;
j
++
)
{
for
(
int
k
=
0
;
k
<
numCCMA
;
k
++
)
{
if
(
j
==
k
)
{
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
j
,
1.0
));
continue
;
}
double
scale
;
int
cj
=
ccmaConstraints
[
j
];
int
ck
=
ccmaConstraints
[
k
];
int
atomj0
=
atom1
[
cj
];
int
atomj1
=
atom2
[
cj
];
int
atomk0
=
atom1
[
ck
];
int
atomk1
=
atom2
[
ck
];
int
atoma
,
atomb
,
atomc
;
double
imj0
=
1.0
/
system
.
getParticleMass
(
atomj0
);
double
imj1
=
1.0
/
system
.
getParticleMass
(
atomj1
);
if
(
atomj0
==
atomk0
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk1
;
scale
=
imj0
/
(
imj0
+
imj1
);
}
else
if
(
atomj1
==
atomk1
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk0
;
scale
=
imj1
/
(
imj0
+
imj1
);
}
else
if
(
atomj0
==
atomk1
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk0
;
scale
=
imj0
/
(
imj0
+
imj1
);
}
else
if
(
atomj1
==
atomk0
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk1
;
scale
=
imj1
/
(
imj0
+
imj1
);
}
else
continue
;
// These constraints are not connected.
// Look for a third constraint forming a triangle with these two.
bool
foundConstraint
=
false
;
for
(
int
m
=
0
;
m
<
numCCMA
;
m
++
)
{
int
other
=
ccmaConstraints
[
m
];
if
((
atom1
[
other
]
==
atoma
&&
atom2
[
other
]
==
atomc
)
||
(
atom1
[
other
]
==
atomc
&&
atom2
[
other
]
==
atoma
))
{
double
d1
=
distance
[
cj
];
double
d2
=
distance
[
ck
];
double
d3
=
distance
[
other
];
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
(
d1
*
d1
+
d2
*
d2
-
d3
*
d3
)
/
(
2.0
*
d1
*
d2
)));
foundConstraint
=
true
;
break
;
}
}
if
(
!
foundConstraint
&&
angleForce
!=
NULL
)
{
// We didn't find one, so look for an angle force field term.
const
vector
<
int
>&
angleCandidates
=
atomAngles
[
atomb
];
for
(
vector
<
int
>::
const_iterator
iter
=
angleCandidates
.
begin
();
iter
!=
angleCandidates
.
end
();
iter
++
)
{
int
particle1
,
particle2
,
particle3
;
double
angle
,
ka
;
angleForce
->
getAngleParameters
(
*
iter
,
particle1
,
particle2
,
particle3
,
angle
,
ka
);
if
((
particle1
==
atoma
&&
particle3
==
atomc
)
||
(
particle3
==
atoma
&&
particle1
==
atomc
))
{
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
cos
(
angle
)));
break
;
}
}
}
}
}
// Invert it using QR.
vector
<
int
>
matrixRowStart
;
vector
<
int
>
matrixColIndex
;
vector
<
double
>
matrixValue
;
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
matrixRowStart
.
push_back
(
matrixValue
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
matrix
[
i
].
size
();
j
++
)
{
pair
<
int
,
double
>
element
=
matrix
[
i
][
j
];
matrixColIndex
.
push_back
(
element
.
first
);
matrixValue
.
push_back
(
element
.
second
);
}
}
matrixRowStart
.
push_back
(
matrixValue
.
size
());
int
*
qRowStart
,
*
qColIndex
,
*
rRowStart
,
*
rColIndex
;
double
*
qValue
,
*
rValue
;
int
result
=
QUERN_compute_qr
(
numCCMA
,
numCCMA
,
&
matrixRowStart
[
0
],
&
matrixColIndex
[
0
],
&
matrixValue
[
0
],
NULL
,
&
qRowStart
,
&
qColIndex
,
&
qValue
,
&
rRowStart
,
&
rColIndex
,
&
rValue
);
vector
<
double
>
rhs
(
numCCMA
);
matrix
.
clear
();
matrix
.
resize
(
numCCMA
);
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
// Extract column i of the inverse matrix.
for
(
int
j
=
0
;
j
<
numCCMA
;
j
++
)
rhs
[
j
]
=
(
i
==
j
?
1.0
:
0.0
);
result
=
QUERN_multiply_with_q_transpose
(
numCCMA
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
result
=
QUERN_solve_with_r
(
numCCMA
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numCCMA
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
ccmaConstraints
[
i
]]
/
distance
[
ccmaConstraints
[
j
]];
if
(
abs
(
value
)
>
0.1
)
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
i
,
value
));
}
}
QUERN_free_result
(
qRowStart
,
qColIndex
,
qValue
);
QUERN_free_result
(
rRowStart
,
rColIndex
,
rValue
);
// Create a ReferenceCCMAAlgorithm. It will build and invert the constraint matrix for us.
ReferenceCCMAAlgorithm
ccma
(
numAtoms
,
numCCMA
,
refIndices
,
refDistance
,
refMasses
,
angles
,
0.1
);
vector
<
vector
<
pair
<
int
,
double
>
>
>
matrix
=
ccma
.
getMatrix
();
int
maxRowElements
=
0
;
for
(
unsigned
i
=
0
;
i
<
matrix
.
size
();
i
++
)
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
++
;
// Build the list of constraints for each atom.
vector
<
vector
<
int
>
>
atomConstraints
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
atomConstraints
[
atom1
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom2
[
ccmaConstraints
[
i
]]].
push_back
(
i
);
}
int
maxAtomConstraints
=
0
;
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Sort the constraints.
vector
<
int
>
constraintOrder
(
numCCMA
);
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
71d33617
...
...
@@ -1127,7 +1127,7 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
return
;
int
numMaps
=
force
.
getNumMaps
();
vector
<
float4
>
coeffVec
;
vector
<
int2
>
mapPositionsVec
(
numMaps
);
mapPositionsVec
.
resize
(
numMaps
);
vector
<
double
>
energy
;
vector
<
vector
<
double
>
>
c
;
int
currentPosition
=
0
;
...
...
@@ -1166,6 +1166,49 @@ double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includ
return
0.0
;
}
void
CudaCalcCMAPTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
)
{
int
numMaps
=
force
.
getNumMaps
();
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cu
.
getContextIndex
()
*
force
.
getNumTorsions
()
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
force
.
getNumTorsions
()
/
numContexts
;
numTorsions
=
endIndex
-
startIndex
;
if
(
mapPositions
->
getSize
()
!=
numMaps
)
throw
OpenMMException
(
"updateParametersInContext: The number of maps has changed"
);
if
(
torsionMaps
->
getSize
()
!=
numTorsions
)
throw
OpenMMException
(
"updateParametersInContext: The number of CMAP torsions has changed"
);
// Update the maps.
vector
<
float4
>
coeffVec
;
vector
<
double
>
energy
;
vector
<
vector
<
double
>
>
c
;
int
currentPosition
=
0
;
for
(
int
i
=
0
;
i
<
numMaps
;
i
++
)
{
int
size
;
force
.
getMapParameters
(
i
,
size
,
energy
);
if
(
size
!=
mapPositionsVec
[
i
].
y
)
throw
OpenMMException
(
"updateParametersInContext: The size of a map has changed"
);
CMAPTorsionForceImpl
::
calcMapDerivatives
(
size
,
energy
,
c
);
currentPosition
+=
4
*
size
*
size
;
for
(
int
j
=
0
;
j
<
size
*
size
;
j
++
)
{
coeffVec
.
push_back
(
make_float4
((
float
)
c
[
j
][
0
],
(
float
)
c
[
j
][
1
],
(
float
)
c
[
j
][
2
],
(
float
)
c
[
j
][
3
]));
coeffVec
.
push_back
(
make_float4
((
float
)
c
[
j
][
4
],
(
float
)
c
[
j
][
5
],
(
float
)
c
[
j
][
6
],
(
float
)
c
[
j
][
7
]));
coeffVec
.
push_back
(
make_float4
((
float
)
c
[
j
][
8
],
(
float
)
c
[
j
][
9
],
(
float
)
c
[
j
][
10
],
(
float
)
c
[
j
][
11
]));
coeffVec
.
push_back
(
make_float4
((
float
)
c
[
j
][
12
],
(
float
)
c
[
j
][
13
],
(
float
)
c
[
j
][
14
],
(
float
)
c
[
j
][
15
]));
}
}
coefficients
->
upload
(
coeffVec
);
// Update the indices.
vector
<
int
>
torsionMapsVec
(
numTorsions
);
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
{
int
index
[
8
];
force
.
getTorsionParameters
(
i
,
torsionMapsVec
[
i
],
index
[
0
],
index
[
1
],
index
[
2
],
index
[
3
],
index
[
4
],
index
[
5
],
index
[
6
],
index
[
7
]);
}
torsionMaps
->
upload
(
torsionMapsVec
);
}
class
CudaCustomTorsionForceInfo
:
public
CudaForceInfo
{
public:
CudaCustomTorsionForceInfo
(
const
CustomTorsionForce
&
force
)
:
force
(
force
)
{
...
...
@@ -5688,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari
variables
[
"dt"
]
=
"dt[0].y"
;
variables
[
"uniform"
]
=
"uniform"
;
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
energyName
]
=
"energy
[0]
"
;
variables
[
energyName
]
=
"energy"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
...
...
@@ -5724,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables
[
"m"
]
=
"mass"
;
variables
[
"dt"
]
=
"stepSize"
;
if
(
energyName
!=
""
)
variables
[
energyName
]
=
"energy
[0]
"
;
variables
[
energyName
]
=
"energy"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
...
...
@@ -5957,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
if
(
cu
.
getUseDoublePrecision
())
args1
.
push_back
(
&
energy
);
else
args1
.
push_back
(
&
energyFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kernelArgs
[
step
].
push_back
(
args1
);
...
...
@@ -6006,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args
.
push_back
(
NULL
);
args
.
push_back
(
NULL
);
args
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
if
(
cu
.
getUseDoublePrecision
())
args
.
push_back
(
&
energy
);
else
args
.
push_back
(
&
energyFloat
);
kernelArgs
[
step
].
push_back
(
args
);
}
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
{
...
...
@@ -6046,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Create the kernel for summing the potential energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
cu
.
getEnergyBuffer
().
getSize
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
sumPotentialEnergyKernel
=
cu
.
getKernel
(
module
,
cu
.
getUseDoublePrecision
()
?
"computeDoubleSum"
:
"computeFloatSum"
);
// Create the kernel for computing kinetic energy.
stringstream
computeKE
;
...
...
@@ -6074,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorPerDof
,
replacements
),
defines
);
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorPerDof
,
replacements
),
defines
);
kineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computePerDof"
);
kineticEnergyArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
NULL
);
...
...
@@ -6091,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
if
(
cu
.
getUseDoublePrecision
())
kineticEnergyArgs
.
push_back
(
&
energy
);
else
kineticEnergyArgs
.
push_back
(
&
energyFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
kineticEnergyArgs
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
keNeedsForce
=
usesVariable
(
keExpression
,
"f"
);
...
...
@@ -6197,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
else
{
recordChangedParameters
(
context
);
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
if
(
computeEnergy
)
{
void
*
args
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
potentialEnergy
->
getDevicePointer
()};
cu
.
executeKernel
(
sumPotentialEnergyKernel
,
&
args
[
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
energy
=
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
energyFloat
=
(
float
)
energy
;
}
forcesAreValid
=
true
;
}
...
...
@@ -6272,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
bool
willNeedEnergy
=
false
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumComputations
();
i
++
)
willNeedEnergy
|=
needsEnergy
[
i
];
context
.
calcForcesAndEnergy
(
true
,
willNeedEnergy
,
-
1
);
if
(
willNeedEnergy
)
{
void
*
args
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
potentialEnergy
->
getDevicePointer
()};
cu
.
executeKernel
(
sumPotentialEnergyKernel
,
&
args
[
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
energy
=
context
.
calcForcesAndEnergy
(
true
,
willNeedEnergy
,
-
1
);
energyFloat
=
(
float
)
energy
;
forcesAreValid
=
true
;
}
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
71d33617
...
...
@@ -536,6 +536,11 @@ double CudaParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, boo
return
0.0
;
}
void
CudaParallelCalcCMAPTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomTorsionForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/cuda/src/kernels/customIntegratorGlobal.cu
View file @
71d33617
extern
"C"
__global__
void
computeGlobal
(
mixed2
*
__restrict__
dt
,
mixed
*
__restrict__
globals
,
mixed
*
__restrict__
params
,
float
uniform
,
float
gaussian
,
const
real
*
__restrict__
energy
)
{
float
uniform
,
float
gaussian
,
const
real
energy
)
{
COMPUTE_STEP
}
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
71d33617
...
...
@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
energy
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
View file @
71d33617
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -105,11 +105,68 @@ void testCMAPTorsions() {
}
}
void
testChangingParameters
()
{
// Create a system with two maps and one torsion.
const
int
mapSize
=
8
;
System
system
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
system
.
addParticle
(
1.0
);
CMAPTorsionForce
*
cmap
=
new
CMAPTorsionForce
();
vector
<
double
>
mapEnergy1
(
mapSize
*
mapSize
);
vector
<
double
>
mapEnergy2
(
mapSize
*
mapSize
);
for
(
int
i
=
0
;
i
<
mapSize
;
i
++
)
{
double
angle1
=
i
*
2
*
M_PI
/
mapSize
;
double
energy1
=
cos
(
angle1
);
for
(
int
j
=
0
;
j
<
mapSize
;
j
++
)
{
double
angle2
=
j
*
2
*
M_PI
/
mapSize
;
double
energy2
=
10
*
sin
(
angle2
);
mapEnergy1
[
i
+
j
*
mapSize
]
=
energy1
+
energy2
;
mapEnergy2
[
i
+
j
*
mapSize
]
=
energy1
-
energy2
;
}
}
cmap
->
addMap
(
mapSize
,
mapEnergy1
);
cmap
->
addMap
(
mapSize
,
mapEnergy2
);
cmap
->
addTorsion
(
0
,
0
,
1
,
2
,
3
,
1
,
2
,
3
,
4
);
system
.
addForce
(
cmap
);
// Set particle positions so angle1=0 and angle2=PI/4.
vector
<
Vec3
>
positions
(
5
);
positions
[
0
]
=
Vec3
(
0
,
0
,
1
);
positions
[
1
]
=
Vec3
(
0
,
0
,
0
);
positions
[
2
]
=
Vec3
(
1
,
0
,
0
);
positions
[
3
]
=
Vec3
(
1
,
0
,
1
);
positions
[
4
]
=
Vec3
(
0.5
,
-
0.5
,
1
);
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// Check that the energy is correct.
double
energy
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
1
+
10
*
sin
(
M_PI
/
4
),
energy
,
1e-5
);
// Modify the parameters.
cmap
->
setTorsionParameters
(
0
,
1
,
0
,
1
,
2
,
3
,
1
,
2
,
3
,
4
);
for
(
int
i
=
0
;
i
<
mapSize
*
mapSize
;
i
++
)
mapEnergy2
[
i
]
*=
2.0
;
cmap
->
setMapParameters
(
1
,
mapSize
,
mapEnergy2
);
cmap
->
updateParametersInContext
(
context
);
// See if the results are correct.
energy
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
2
-
20
*
sin
(
M_PI
/
4
),
energy
,
1e-5
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
testCMAPTorsions
();
testChangingParameters
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/opencl/include/OpenCLFFT3D.h
View file @
71d33617
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -40,7 +40,7 @@ namespace OpenMM {
* 15, 207–228 (2000).
* <p>
* This class places certain restrictions on the allowed dimensions of the grid. First,
* the size of each dimension may have no prime factors other than 2, 3, and
5
. You
* the size of each dimension may have no prime factors other than 2, 3,
5,
and
7
. You
* can call findLegalDimension() to determine the smallest size that satisfies this
* requirement and is greater than or equal to a specified minimum size. Second, the size
* of each dimension must be small enough to compute each 1D transform entirely in local
...
...
@@ -61,12 +61,17 @@ public:
* @param xsize the first dimension of the data sets on which FFTs will be performed
* @param ysize the second dimension of the data sets on which FFTs will be performed
* @param zsize the third dimension of the data sets on which FFTs will be performed
* @param realToComplex if true, a real-to-complex transform will be done. Otherwise, it is complex-to-complex.
*/
OpenCLFFT3D
(
OpenCLContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
);
OpenCLFFT3D
(
OpenCLContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
,
bool
realToComplex
=
false
);
/**
* Perform a Fourier transform. The transform cannot be done in-place: the input and output
* arrays must be different. Also, the input array is used as workspace, so its contents
* are destroyed.
* are destroyed. This also means that both arrays must be large enough to hold complex values,
* even when performing a real-to-complex transform.
* <p>
* When performing a real-to-complex transform, the output data is of size xsize*ysize*(zsize/2+1)
* and contains only the non-redundant elements.
*
* @param in the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param out on exit, this contains the transformed data
...
...
@@ -75,17 +80,20 @@ public:
void
execFFT
(
OpenCLArray
&
in
,
OpenCLArray
&
out
,
bool
forward
=
true
);
/**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, and
5
).
* factors other than 2, 3,
5,
and
7
).
*
* @param minimum the minimum size the return value must be greater than or equal to
*/
static
int
findLegalDimension
(
int
minimum
);
private:
cl
::
Kernel
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
);
cl
::
Kernel
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
,
int
axis
,
bool
forward
,
bool
inputIsReal
);
int
xsize
,
ysize
,
zsize
;
int
xthreads
,
ythreads
,
zthreads
;
bool
packRealAsComplex
;
OpenCLContext
&
context
;
cl
::
Kernel
xkernel
,
ykernel
,
zkernel
;
cl
::
Kernel
invxkernel
,
invykernel
,
invzkernel
;
cl
::
Kernel
packForwardKernel
,
unpackForwardKernel
,
packBackwardKernel
,
unpackBackwardKernel
;
};
}
// namespace OpenMM
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
71d33617
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -498,11 +498,19 @@ public:
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CMAPTorsionForce
&
force
);
private:
int
numTorsions
;
bool
hasInitializedKernel
;
OpenCLContext
&
cl
;
const
System
&
system
;
std
::
vector
<
mm_int2
>
mapPositionsVec
;
OpenCLArray
*
coefficients
;
OpenCLArray
*
mapPositions
;
OpenCLArray
*
torsionMaps
;
...
...
@@ -631,6 +639,7 @@ private:
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
...
...
@@ -1264,7 +1273,7 @@ private:
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
recordChangedParameters
(
ContextImpl
&
context
);
OpenCLContext
&
cl
;
double
prevStepSize
;
double
prevStepSize
,
energy
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
mutable
bool
localValuesAreCurrent
;
...
...
@@ -1284,7 +1293,7 @@ private:
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
float
>
contextValues
;
std
::
vector
<
std
::
vector
<
cl
::
Kernel
>
>
kernels
;
cl
::
Kernel
sumPotentialEnergyKernel
,
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
cl
::
Kernel
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment