Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0b96aa2e
Commit
0b96aa2e
authored
Oct 28, 2010
by
Peter Eastman
Browse files
Fixes and optimizations to PME on CPU
parent
e28ce558
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
175 additions
and
12 deletions
+175
-12
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+15
-6
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+0
-6
platforms/opencl/src/kernels/pme_cpu.cl
platforms/opencl/src/kernels/pme_cpu.cl
+158
-0
No files found.
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
0b96aa2e
...
@@ -43,6 +43,8 @@ OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize
...
@@ -43,6 +43,8 @@ OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize
void
OpenCLFFT3D
::
execFFT
(
OpenCLArray
<
mm_float2
>&
data
,
bool
forward
)
{
void
OpenCLFFT3D
::
execFFT
(
OpenCLArray
<
mm_float2
>&
data
,
bool
forward
)
{
int
maxSize
=
xkernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
());
int
maxSize
=
xkernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
());
if
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
)
maxSize
=
1
;
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl
::
Buffer
>
(
0
,
data
.
getDeviceBuffer
());
xkernel
.
setArg
<
cl_float
>
(
1
,
forward
?
1.0
f
:
-
1.0
f
);
xkernel
.
setArg
<
cl_float
>
(
1
,
forward
?
1.0
f
:
-
1.0
f
);
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
context
.
executeKernel
(
xkernel
,
xsize
*
ysize
*
zsize
,
min
(
xsize
,
(
int
)
maxSize
));
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
0b96aa2e
...
@@ -1361,6 +1361,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1361,6 +1361,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
}
double
OpenCLCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
OpenCLCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
if
(
!
hasInitializedKernel
)
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
hasInitializedKernel
=
true
;
if
(
exceptionIndices
!=
NULL
)
{
if
(
exceptionIndices
!=
NULL
)
{
...
@@ -1379,7 +1380,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1379,7 +1380,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cosSinSums
->
getDeviceBuffer
());
ewaldForcesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cosSinSums
->
getDeviceBuffer
());
}
}
if
(
pmeGrid
!=
NULL
)
{
if
(
pmeGrid
!=
NULL
)
{
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
pme
,
pmeDefines
);
string
file
=
(
deviceIsCpu
?
OpenCLKernelSources
::
pme_cpu
:
OpenCLKernelSources
::
pme
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
pmeDefines
);
pmeUpdateBsplinesKernel
=
cl
::
Kernel
(
program
,
"updateBsplines"
);
pmeUpdateBsplinesKernel
=
cl
::
Kernel
(
program
,
"updateBsplines"
);
pmeAtomRangeKernel
=
cl
::
Kernel
(
program
,
"findAtomRangeForGrid"
);
pmeAtomRangeKernel
=
cl
::
Kernel
(
program
,
"findAtomRangeForGrid"
);
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
...
@@ -1429,11 +1431,18 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1429,11 +1431,18 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
if
(
deviceIsCpu
)
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
5
,
boxSize
);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
invBoxSize
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
}
else
{
sort
->
sort
(
*
pmeAtomGridIndex
);
sort
->
sort
(
*
pmeAtomGridIndex
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
3
,
boxSize
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
3
,
boxSize
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
pmeAtomRangeKernel
.
setArg
<
mm_float4
>
(
4
,
invBoxSize
);
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
}
fft
->
execFFT
(
*
pmeGrid
,
true
);
fft
->
execFFT
(
*
pmeGrid
,
true
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
(
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
)));
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
(
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
)));
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
0b96aa2e
...
@@ -91,7 +91,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -91,7 +91,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
if
(
!isExcluded
)
{
#
endif
#
endif
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
...
@@ -123,9 +122,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -123,9 +122,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#
endif
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
}
}
#
endif
#
ifdef
USE_EXCLUSIONS
}
#
endif
#
endif
excl
>>=
1
;
excl
>>=
1
;
}
}
...
@@ -219,7 +215,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -219,7 +215,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
if
(
!isExcluded
)
{
#
endif
#
endif
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
posq2
=
(
float4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
...
@@ -260,7 +255,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -260,7 +255,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
}
}
#
endif
#
endif
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
}
excl
>>=
1
;
excl
>>=
1
;
#
endif
#
endif
}
}
...
...
platforms/opencl/src/kernels/pme_cpu.cl
0 → 100644
View file @
0b96aa2e
__kernel
void
updateBsplines
(
__global
float4*
posq,
__global
float4*
pmeBsplineTheta,
__global
float4*
pmeBsplineDTheta,
__local
float4*
bsplinesCache,
__global
int2*
pmeAtomGridIndex,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
const
float4
scale
=
1.0f/
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
float4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
float4*
ddata
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER
+
get_local_size
(
0
)
*PME_ORDER]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j]
=
0.0f
;
ddata[j]
=
0.0f
;
}
float4
pos
=
posq[i]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
float4
t
=
(
float4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
float4
dr
=
(
float4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0.0f
)
;
data[PME_ORDER-1]
=
0.0f
;
data[1]
=
dr
;
data[0]
=
1.0f-dr
;
for
(
int
j
=
3
; j < PME_ORDER; j++) {
float
div
=
1.0f/
(
j-1.0f
)
;
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
float4
)
k
)
*data[j-k-2]
+
(
-dr+
(
float4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
}
ddata[0]
=
-data[0]
;
for
(
int
j
=
1
; j < PME_ORDER; j++)
ddata[j]
=
data[j-1]-data[j]
;
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
float4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
float4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
pmeBsplineDTheta[i+j*NUM_ATOMS]
=
ddata[j]
;
}
}
}
/**
*
This
kernel
is
not
actually
used
when
running
on
a
CPU.
*/
__kernel
void
findAtomRangeForGrid
(
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float4*
posq,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
}
__kernel
void
gridSpreadCharge
(
__global
float4*
posq,
__global
int2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float2*
pmeGrid,
__global
float4*
pmeBsplineTheta,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
for
(
int
gridIndex
=
firstx*GRID_SIZE_Y*GRID_SIZE_Z
; gridIndex < lastx*GRID_SIZE_Y*GRID_SIZE_Z; gridIndex++)
pmeGrid[gridIndex]
=
(
float2
)
(
0.0f,
0.0f
)
;
for
(
int
atom
=
0
; atom < NUM_ATOMS; atom++) {
float4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
float4
t
=
(
float4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
float4
dr
=
(
float4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0.0f
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
float
atomCharge
=
pos.w*EPSILON_FACTOR
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
if
(
xindex
<
firstx
||
xindex
>=
lastx
)
continue
;
for
(
int
iy
=
0
; iy < PME_ORDER; iy++) {
int
yindex
=
gridIndex.y+iy
;
yindex
-=
(
yindex
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
)
;
for
(
int
iz
=
0
; iz < PME_ORDER; iz++) {
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
pmeGrid[index].x
+=
atomCharge*pmeBsplineTheta[atom+ix*NUM_ATOMS].x*pmeBsplineTheta[atom+iy*NUM_ATOMS].y*pmeBsplineTheta[atom+iz*NUM_ATOMS].z
;
}
}
}
}
}
__kernel
void
reciprocalConvolution
(
__global
float2*
pmeGrid,
__global
float*
energyBuffer,
__global
float*
pmeBsplineModuliX,
__global
float*
pmeBsplineModuliY,
__global
float*
pmeBsplineModuliZ,
float4
invPeriodicBoxSize,
float
recipScaleFactor
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
float
energy
=
0.0f
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
kx
=
index/
(
GRID_SIZE_Y*GRID_SIZE_Z
)
;
int
remainder
=
index-kx*GRID_SIZE_Y*GRID_SIZE_Z
;
int
ky
=
remainder/GRID_SIZE_Z
;
int
kz
=
remainder-ky*GRID_SIZE_Z
;
if
(
kx
==
0
&&
ky
==
0
&&
kz
==
0
)
continue
;
int
mx
=
(
kx
<
(
GRID_SIZE_X+1
)
/2
)
?
kx
:
(
kx-GRID_SIZE_X
)
;
int
my
=
(
ky
<
(
GRID_SIZE_Y+1
)
/2
)
?
ky
:
(
ky-GRID_SIZE_Y
)
;
int
mz
=
(
kz
<
(
GRID_SIZE_Z+1
)
/2
)
?
kz
:
(
kz-GRID_SIZE_Z
)
;
float
mhx
=
mx*invPeriodicBoxSize.x
;
float
mhy
=
my*invPeriodicBoxSize.y
;
float
mhz
=
mz*invPeriodicBoxSize.z
;
float
bx
=
pmeBsplineModuliX[kx]
;
float
by
=
pmeBsplineModuliY[ky]
;
float
bz
=
pmeBsplineModuliZ[kz]
;
float2
grid
=
pmeGrid[index]
;
float
m2
=
mhx*mhx+mhy*mhy+mhz*mhz
;
float
denom
=
m2*bx*by*bz
;
float
eterm
=
recipScaleFactor*EXP
(
-RECIP_EXP_FACTOR*m2
)
/denom
;
pmeGrid[index]
=
(
float2
)
(
grid.x*eterm,
grid.y*eterm
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
}
__kernel
void
gridInterpolateForce
(
__global
float4*
posq,
__global
float4*
forceBuffers,
__global
float4*
pmeBsplineTheta,
__global
float4*
pmeBsplineDTheta,
__global
float2*
pmeGrid,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0)) {
float4
force
=
0.0f
;
float4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
float4
t
=
(
float4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
float
tx
=
pmeBsplineTheta[atom+ix*NUM_ATOMS].x
;
float
dtx
=
pmeBsplineDTheta[atom+ix*NUM_ATOMS].x
;
for
(
int
iy
=
0
; iy < PME_ORDER; iy++) {
int
yindex
=
gridIndex.y+iy
;
yindex
-=
(
yindex
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
)
;
float
ty
=
pmeBsplineTheta[atom+iy*NUM_ATOMS].y
;
float
dty
=
pmeBsplineDTheta[atom+iy*NUM_ATOMS].y
;
for
(
int
iz
=
0
; iz < PME_ORDER; iz++) {
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
float
tz
=
pmeBsplineTheta[atom+iz*NUM_ATOMS].z
;
float
dtz
=
pmeBsplineDTheta[atom+iz*NUM_ATOMS].z
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
float
gridvalue
=
pmeGrid[index].x
;
force.x
+=
dtx*ty*tz*gridvalue
;
force.y
+=
tx*dty*tz*gridvalue
;
force.z
+=
tx*ty*dtz*gridvalue
;
}
}
}
float4
totalForce
=
forceBuffers[atom]
;
float
q
=
pos.w*EPSILON_FACTOR
;
totalForce.x
-=
q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x
;
totalForce.y
-=
q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y
;
totalForce.z
-=
q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z
;
forceBuffers[atom]
=
totalForce
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment