Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a9054686
Commit
a9054686
authored
Oct 06, 2010
by
Mark Friedrichs
Browse files
Mods for direct PME
parent
01260070
Changes
18
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
833 additions
and
920 deletions
+833
-920
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
+18
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
+30
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+12
-0
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+31
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+3
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
...cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
+5
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
...a/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
+5
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+12
-12
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+46
-26
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+217
-375
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+58
-54
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+175
-336
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+50
-10
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+126
-101
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaMapTorques.cu
.../platforms/cuda/src/kernels/kCalculateAmoebaMapTorques.cu
+6
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+38
-0
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
...amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+0
-1
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
View file @
a9054686
...
...
@@ -42,6 +42,8 @@ AmoebaCudaData::AmoebaCudaData( CudaPlatform::PlatformData& data ) : cudaPlatfor
log
=
NULL
;
contextImpl
=
NULL
;
gpuInitialized
=
false
;
applyCutoff
=
0
;
multipoleForceCount
=
0
;
}
AmoebaCudaData
::~
AmoebaCudaData
()
{
...
...
@@ -122,5 +124,21 @@ void AmoebaCudaData::initializeGpu( void ) {
return
;
}
void
AmoebaCudaData
::
incrementMultipoleForceCount
(
void
)
{
multipoleForceCount
++
;
}
int
AmoebaCudaData
::
getMultipoleForceCount
(
void
)
const
{
return
multipoleForceCount
;
}
void
AmoebaCudaData
::
setApplyCutoff
(
int
inputApplyCutoff
)
{
applyCutoff
=
inputApplyCutoff
;
}
int
AmoebaCudaData
::
getApplyCutoff
(
void
)
const
{
return
applyCutoff
;
}
}
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
View file @
a9054686
...
...
@@ -139,11 +139,41 @@ public:
*/
void
setContextImpl
(
void
*
contextImpl
);
/**
* Get multipole force count
*
* @return multipole force count
*/
int
getMultipoleForceCount
(
void
)
const
;
/**
* Get multipole force count
*
* @return multipole force count
*/
void
incrementMultipoleForceCount
(
void
);
/**
* Get multipole force count
*
* @return multipole force count
*/
int
getApplyCutoff
(
)
const
;
/**
* Get multipole force count
*
* @return multipole force count
*/
void
setApplyCutoff
(
int
applyCutoff
);
private:
CudaPlatform
::
PlatformData
&
cudaPlatformData
;
amoebaGpuContext
amoebaGpu
;
bool
hasAmoebaBonds
,
hasAmoebaGeneralizedKirkwood
,
hasAmoebaMultipole
;
int
multipoleForceCount
;
int
applyCutoff
;
KernelImpl
*
localForceKernel
;
unsigned
int
kernelCount
;
void
*
contextImpl
;
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
a9054686
...
...
@@ -669,6 +669,13 @@ double CudaCalcAmoebaTorsionTorsionForceKernel::execute(ContextImpl& context, bo
static
void
computeAmoebaMultipoleForce
(
AmoebaCudaData
&
data
)
{
amoebaGpuContext
gpu
=
data
.
getAmoebaGpu
();
if
(
data
.
getMultipoleForceCount
()
==
0
){
gpuCopyInteractingWorkUnit
(
gpu
);
}
if
(
data
.
getApplyCutoff
()
&&
(
data
.
getMultipoleForceCount
()
%
100
)
==
0
){
gpuReorderAtoms
(
gpu
->
gpuContext
);
}
data
.
incrementMultipoleForceCount
();
data
.
initializeGpu
();
if
(
0
&&
data
.
getLog
()
){
...
...
@@ -867,6 +874,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
zsize
=
pmeGridDimension
[
2
];
}
gpuSetAmoebaPMEParameters
(
data
.
getAmoebaGpu
(),
(
float
)
alpha
,
xsize
,
ysize
,
zsize
);
data
.
setApplyCutoff
(
1
);
amoebaGpuContext
amoebaGpu
=
data
.
getAmoebaGpu
();
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpu
->
sim
.
nonbondedCutoffSqr
=
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
();
gpu
->
sim
.
nonbondedMethod
=
PARTICLE_MESH_EWALD
;
}
data
.
getAmoebaGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
a9054686
...
...
@@ -350,7 +350,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" sqrtPi %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
sqrtPi
);
(
void
)
fprintf
(
log
,
" alpha Ewald %15.7e
\n
"
,
gpu
->
sim
.
alphaEwald
);
(
void
)
fprintf
(
log
,
" PME grid dimensions %6d %6d %6d
\n
"
,
gpu
->
sim
.
pmeGridSize
.
x
,
gpu
->
sim
.
pmeGridSize
.
y
,
gpu
->
sim
.
pmeGridSize
.
z
);
(
void
)
fprintf
(
log
,
"
cutoffDistance2
%15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
cutoffDistance2
);
(
void
)
fprintf
(
log
,
"
nonbondedCutoffSqr
%15.7e
\n
"
,
gpu
->
sim
.
nonbondedCutoffSqr
);
(
void
)
fprintf
(
log
,
" electric %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
electric
);
(
void
)
fprintf
(
log
,
" box %15.7e %15.7e %15.7e
\n
"
,
gpu
->
sim
.
periodicBoxSizeX
,
gpu
->
sim
.
periodicBoxSizeY
,
gpu
->
sim
.
periodicBoxSizeZ
);
(
void
)
fprintf
(
log
,
" gkc %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
gkc
);
...
...
@@ -1554,7 +1554,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
AMOEBA_NO_CUTOFF
,
AMOEBA_PARTICLE_MESH_EWALD
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
amoebaGpu
->
amoebaSim
.
cutoffDistance2
=
cutoffDistance
*
cutoffDistance
;
amoebaGpu
->
amoebaSim
.
sqrtPi
=
std
::
sqrt
(
3.14159265358
f
);
amoebaGpu
->
amoebaSim
.
electric
=
electricConstant
;
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
alphaEwald
;
...
...
@@ -4297,4 +4296,34 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
}
}
/**---------------------------------------------------------------------------------------
Track iterations for MI dipoles
@param amoebaGpu amoebaGpuContext reference
@param iteration MI iteration
--------------------------------------------------------------------------------------- */
void
gpuCopyInteractingWorkUnit
(
amoebaGpuContext
amoebaGpu
){
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psWorkUnit
->
Download
();
amoebaGpu
->
psWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"gpuCopyInteractingWorkUnit called -- to be removed.
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
}
gpu
->
psInteractingWorkUnit
->
Upload
();
gpu
->
psWorkUnit
->
Upload
();
// ---------------------------------------------------------------------------------------
}
#undef AMOEBA_DEBUG
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
View file @
a9054686
...
...
@@ -126,7 +126,7 @@ struct cudaAmoebaGmxSimulation {
unsigned
int
numberOfAtoms
;
// number of atoms
unsigned
int
paddedNumberOfAtoms
;
// padded number of atoms
float
cutoffDistance2
;
// cutoff distance squared for PME
//
float cutoffDistance2; // cutoff distance squared for PME
float
sqrtPi
;
// sqrt(PI)
float
scalingDistanceCutoff
;
// scaling cutoff
float2
*
pDampingFactorAndThole
;
// Thole & damping factors
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
a9054686
...
...
@@ -343,6 +343,9 @@ void amoebaGpuSetConstants(amoebaGpuContext gpu);
extern
"C"
void
gpuSetAmoebaBondOffsets
(
amoebaGpuContext
gpu
);
extern
"C"
void
gpuCopyInteractingWorkUnit
(
amoebaGpuContext
gpu
);
/*
extern "C"
void gpuSetDihedralParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3, const std::vector<int>& atom4,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
View file @
a9054686
...
...
@@ -44,6 +44,11 @@ struct FixedFieldParticle {
float
gkField
[
3
];
#endif
#ifdef INCLUDE_FIXED_FIELD_BUFFERS
float
tempBuffer
[
3
];
float
tempBufferP
[
3
];
#endif
};
__device__
static
void
loadFixedFieldShared
(
struct
FixedFieldParticle
*
sA
,
unsigned
int
atomI
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
View file @
a9054686
...
...
@@ -24,6 +24,11 @@ struct MutualInducedParticle {
float
fieldS
[
3
];
float
fieldPolarS
[
3
];
#endif
#ifdef INCLUDE_MI_FIELD_BUFFERS
float
tempBuffer
[
3
];
float
tempBufferP
[
3
];
#endif
};
__device__
static
void
loadMutualInducedShared
(
MutualInducedParticle
*
sA
,
unsigned
int
atomI
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
a9054686
...
...
@@ -775,15 +775,15 @@ void kComputeFixedMultipoleForceAndEnergy_kernel()
multipole
[
8
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
2
];
multipole
[
9
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
5
];
float
*
phi
=
&
cAmoebaSim
.
pPhi
[
20
*
i
];
cAmoebaSim
.
pTorque
[
3
*
i
]
=
-
cAmoebaSim
.
electric
*
(
multipole
[
3
]
*
yscale
*
phi
[
2
]
-
multipole
[
2
]
*
zscale
*
phi
[
3
]
cAmoebaSim
.
pTorque
[
3
*
i
]
=
cAmoebaSim
.
electric
*
(
multipole
[
3
]
*
yscale
*
phi
[
2
]
-
multipole
[
2
]
*
zscale
*
phi
[
3
]
+
2.0
f
*
(
multipole
[
6
]
-
multipole
[
5
])
*
zscale
*
zscale
*
phi
[
9
]
+
multipole
[
8
]
*
yscale
*
yscale
*
phi
[
7
]
+
multipole
[
9
]
*
xscale
*
yscale
*
phi
[
5
]
-
multipole
[
7
]
*
yscale
*
zscale
*
phi
[
8
]
-
multipole
[
9
]
*
xscale
*
zscale
*
phi
[
6
]);
cAmoebaSim
.
pTorque
[
3
*
i
+
1
]
=
-
cAmoebaSim
.
electric
*
(
multipole
[
1
]
*
zscale
*
phi
[
3
]
-
multipole
[
3
]
*
xscale
*
phi
[
1
]
cAmoebaSim
.
pTorque
[
3
*
i
+
1
]
=
cAmoebaSim
.
electric
*
(
multipole
[
1
]
*
zscale
*
phi
[
3
]
-
multipole
[
3
]
*
xscale
*
phi
[
1
]
+
2.0
f
*
(
multipole
[
4
]
-
multipole
[
6
])
*
zscale
*
zscale
*
phi
[
8
]
+
multipole
[
7
]
*
zscale
*
zscale
*
phi
[
9
]
+
multipole
[
8
]
*
xscale
*
zscale
*
phi
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
phi
[
4
]
-
multipole
[
9
]
*
yscale
*
yscale
*
phi
[
7
]);
cAmoebaSim
.
pTorque
[
3
*
i
+
2
]
=
-
cAmoebaSim
.
electric
*
(
multipole
[
2
]
*
xscale
*
phi
[
1
]
-
multipole
[
1
]
*
yscale
*
phi
[
2
]
cAmoebaSim
.
pTorque
[
3
*
i
+
2
]
=
cAmoebaSim
.
electric
*
(
multipole
[
2
]
*
xscale
*
phi
[
1
]
-
multipole
[
1
]
*
yscale
*
phi
[
2
]
+
2.0
f
*
(
multipole
[
5
]
-
multipole
[
4
])
*
yscale
*
yscale
*
phi
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phi
[
4
]
+
multipole
[
9
]
*
yscale
*
zscale
*
phi
[
8
]
-
multipole
[
7
]
*
xscale
*
yscale
*
phi
[
5
]
-
multipole
[
8
]
*
zscale
*
zscale
*
phi
[
9
]);
...
...
@@ -810,9 +810,9 @@ void kComputeFixedMultipoleForceAndEnergy_kernel()
f
.
y
*=
cAmoebaSim
.
electric
*
cSim
.
pmeGridSize
.
y
*
cSim
.
invPeriodicBoxSizeY
;
f
.
z
*=
cAmoebaSim
.
electric
*
cSim
.
pmeGridSize
.
z
*
cSim
.
invPeriodicBoxSizeZ
;
float4
force
=
cSim
.
pForce4
[
i
];
force
.
x
+
=
f
.
x
;
force
.
y
+
=
f
.
y
;
force
.
z
+
=
f
.
z
;
force
.
x
-
=
f
.
x
;
force
.
y
-
=
f
.
y
;
force
.
z
-
=
f
.
z
;
cSim
.
pForce4
[
i
]
=
force
;
...
...
@@ -854,15 +854,15 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
multipole
[
8
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
2
];
multipole
[
9
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
5
];
float
*
phidp
=
&
cAmoebaSim
.
pPhidp
[
20
*
i
];
cAmoebaSim
.
pTorque
[
3
*
i
]
=
-
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
3
]
*
yscale
*
phidp
[
2
]
-
multipole
[
2
]
*
zscale
*
phidp
[
3
]
cAmoebaSim
.
pTorque
[
3
*
i
]
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
3
]
*
yscale
*
phidp
[
2
]
-
multipole
[
2
]
*
zscale
*
phidp
[
3
]
+
2.0
f
*
(
multipole
[
6
]
-
multipole
[
5
])
*
zscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
yscale
*
yscale
*
phidp
[
7
]
+
multipole
[
9
]
*
xscale
*
yscale
*
phidp
[
5
]
-
multipole
[
7
]
*
yscale
*
zscale
*
phidp
[
8
]
-
multipole
[
9
]
*
xscale
*
zscale
*
phidp
[
6
]);
cAmoebaSim
.
pTorque
[
3
*
i
+
1
]
=
-
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
1
]
*
zscale
*
phidp
[
3
]
-
multipole
[
3
]
*
xscale
*
phidp
[
1
]
cAmoebaSim
.
pTorque
[
3
*
i
+
1
]
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
1
]
*
zscale
*
phidp
[
3
]
-
multipole
[
3
]
*
xscale
*
phidp
[
1
]
+
2.0
f
*
(
multipole
[
4
]
-
multipole
[
6
])
*
zscale
*
zscale
*
phidp
[
8
]
+
multipole
[
7
]
*
zscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
xscale
*
zscale
*
phidp
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
phidp
[
4
]
-
multipole
[
9
]
*
yscale
*
yscale
*
phidp
[
7
]);
cAmoebaSim
.
pTorque
[
3
*
i
+
2
]
=
-
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
2
]
*
xscale
*
phidp
[
1
]
-
multipole
[
1
]
*
yscale
*
phidp
[
2
]
cAmoebaSim
.
pTorque
[
3
*
i
+
2
]
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
2
]
*
xscale
*
phidp
[
1
]
-
multipole
[
1
]
*
yscale
*
phidp
[
2
]
+
2.0
f
*
(
multipole
[
5
]
-
multipole
[
4
])
*
yscale
*
yscale
*
phidp
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phidp
[
4
]
+
multipole
[
9
]
*
yscale
*
zscale
*
phidp
[
8
]
-
multipole
[
7
]
*
xscale
*
yscale
*
phidp
[
5
]
-
multipole
[
8
]
*
zscale
*
zscale
*
phidp
[
9
]);
...
...
@@ -906,9 +906,9 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
f
.
y
*=
0.5
f
*
cAmoebaSim
.
electric
*
cSim
.
pmeGridSize
.
y
*
cSim
.
invPeriodicBoxSizeY
;
f
.
z
*=
0.5
f
*
cAmoebaSim
.
electric
*
cSim
.
pmeGridSize
.
z
*
cSim
.
invPeriodicBoxSizeZ
;
float4
force
=
cSim
.
pForce4
[
i
];
force
.
x
+
=
f
.
x
;
force
.
y
+
=
f
.
y
;
force
.
z
+
=
f
.
z
;
force
.
x
-
=
f
.
x
;
force
.
y
-
=
f
.
y
;
force
.
z
-
=
f
.
z
;
cSim
.
pForce4
[
i
]
=
force
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
cAmoebaSim
.
electric
*
energy
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
a9054686
...
...
@@ -72,8 +72,20 @@ struct PmeDirectElectrostaticParticle {
float
torque
[
3
];
float
padding
;
float
tempForce
[
3
];
float
tempTorque
[
3
];
};
__device__
void
sumTempBuffer
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
){
atomI
.
tempForce
[
0
]
+=
atomJ
.
tempForce
[
0
];
atomI
.
tempForce
[
1
]
+=
atomJ
.
tempForce
[
1
];
atomI
.
tempForce
[
2
]
+=
atomJ
.
tempForce
[
2
];
atomI
.
tempTorque
[
0
]
+=
atomJ
.
tempTorque
[
0
];
atomI
.
tempTorque
[
1
]
+=
atomJ
.
tempTorque
[
1
];
atomI
.
tempTorque
[
2
]
+=
atomJ
.
tempTorque
[
2
];
}
/*
__device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
...
...
@@ -134,9 +146,9 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir
float
uiy
=
0.5
f
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
]);
float
uiz
=
0.5
f
*
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
]);
atomI
.
torque
[
0
]
-
=
term
*
(
atomI
.
labFrameDipole
[
1
]
*
uiz
-
atomI
.
labFrameDipole
[
2
]
*
uiy
);
atomI
.
torque
[
1
]
-
=
term
*
(
atomI
.
labFrameDipole
[
2
]
*
uix
-
atomI
.
labFrameDipole
[
0
]
*
uiz
);
atomI
.
torque
[
2
]
-
=
term
*
(
atomI
.
labFrameDipole
[
0
]
*
uiy
-
atomI
.
labFrameDipole
[
1
]
*
uix
);
atomI
.
torque
[
0
]
+
=
term
*
(
atomI
.
labFrameDipole
[
1
]
*
uiz
-
atomI
.
labFrameDipole
[
2
]
*
uiy
);
atomI
.
torque
[
1
]
+
=
term
*
(
atomI
.
labFrameDipole
[
2
]
*
uix
-
atomI
.
labFrameDipole
[
0
]
*
uiz
);
atomI
.
torque
[
2
]
+
=
term
*
(
atomI
.
labFrameDipole
[
0
]
*
uiy
-
atomI
.
labFrameDipole
[
1
]
*
uix
);
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
...
...
@@ -186,7 +198,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
gfr
[
8
],
gfri
[
7
];
float
gti
[
7
],
gtri
[
7
];
float
conversionFactor
=
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
// set the permanent multipole and induced dipole values;
...
...
@@ -219,7 +231,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
c
AmoebaSim
.
cutoffDistance2
){
if
(
r2
<=
c
Sim
.
nonbondedCutoffSqr
){
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
...
...
@@ -540,7 +552,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
ei
=
ei
-
erli
;
*
energy
=
conversionFactor
*
(
e
+
ei
);
*
energy
=
-
conversionFactor
*
(
e
+
ei
);
// increment the total intramolecular energy; assumes;
// intramolecular distances are less than half of cell;
...
...
@@ -1161,15 +1173,27 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)
),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
2
);
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticN2Forces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
),
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
nonbond_threads_per_block
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectElectrostaticN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
]
,
kCalculateAmoebaPmeDirectElectrostaticN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -1180,15 +1204,11 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
),
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeDirectElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
// gpu->sim.pInteractingWorkUnit,
// amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -1209,7 +1229,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished PmeDirectElectrostatic kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
1400
;
int
maxPrint
=
5
;
float
conversion
=
1.0
f
/
41.84
f
;
float
forceSum
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
...
...
@@ -1270,7 +1290,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
1
){
if
(
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElec
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
a9054686
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
a9054686
...
...
@@ -80,7 +80,6 @@ static void kReducePmeEFieldPolar_kernel( unsigned int fieldComponents, unsigned
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
...
...
@@ -96,7 +95,6 @@ static void kReducePmeEField_kernel( unsigned int fieldComponents, unsigned int
// Reduce field
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
//const float term = 0.0f;
while
(
pos
<
fieldComponents
)
{
...
...
@@ -154,7 +152,20 @@ static void kReducePmeDirectE_Fields(amoebaGpuContext amoebaGpu )
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
#undef GK
#undef INCLUDE_FIXED_FIELD_BUFFERS
#define INCLUDE_FIXED_FIELD_BUFFERS
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
#undef INCLUDE_FIXED_FIELD_BUFFERS
__device__
void
sumTempBuffer
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
){
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
atomI
.
tempBuffer
[
1
]
+=
atomJ
.
tempBuffer
[
1
];
atomI
.
tempBuffer
[
2
]
+=
atomJ
.
tempBuffer
[
2
];
atomI
.
tempBufferP
[
0
]
+=
atomJ
.
tempBufferP
[
0
];
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
}
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float
fields
[
4
][
3
]
#ifdef AMOEBA_DEBUG
...
...
@@ -175,7 +186,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
...
...
@@ -310,7 +321,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
// increment the field at each site due to this interaction
if
(
r2
<=
c
AmoebaSim
.
cutoffDistance2
){
if
(
r2
<=
c
Sim
.
nonbondedCutoffSqr
){
fields
[
0
][
0
]
=
fim
[
0
]
-
fid
[
0
];
fields
[
0
][
1
]
=
fim
[
1
]
-
fid
[
1
];
...
...
@@ -345,6 +356,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
fields
[
2
][
2
]
=
0.0
f
;
fields
[
3
][
2
]
=
0.0
f
;
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
...
...
@@ -399,6 +411,7 @@ static int isNanOrInfinity( double number ){
static
void
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
...
...
@@ -416,40 +429,27 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
0
;
int
maxPrint
=
3002
;
amoebaGpu
->
psE_Field
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Recip EFields In
\n
"
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// E_Field
int
isNan
=
isNanOrInfinity
(
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
]
);
isNan
+=
isNanOrInfinity
(
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
+
1
]
);
isNan
+=
isNanOrInfinity
(
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E[%16.9e %16.9e %16.9e] %s
\n
"
,
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
+
2
],
(
isNan
?
"XXX"
:
""
)
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Recip EFields End
\n
"
);
unsigned
int
targetAtom
=
354
;
#endif
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondT
hreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondT
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
]
,
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
t
hreadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -459,8 +459,9 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#endif
}
else
{
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
//amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -471,27 +472,16 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
#if 0
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
//float index = 1.0f;
float index = (float) ii;
for( unsigned int jj = 0; jj < 3*amoebaGpu->paddedNumberOfAtoms; jj += 3 ){
unsigned int kk = 3*ii*amoebaGpu->paddedNumberOfAtoms + jj;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk] = index;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk+1] = index;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk+2] = index;
}
}
amoebaGpu->psWorkArray_3_1->Upload();
#endif
kReducePmeDirectE_Fields
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
...
@@ -527,6 +517,8 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
*/
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E-field (includes self term)"
);
int
maxPrint
=
3002
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
...
@@ -558,16 +550,29 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
jj
+
paddedNumberOfAtoms
].
x
)
>
0.0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
for
(
int
kk
=
0
;
kk
<
10
;
kk
++
){
for
(
int
kk
=
0
;
kk
<
6
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e ] [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] p
\n
"
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
x
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
y
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
z
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
x
)
/
5.50
f
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
y
)
/
5.50
f
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
z
)
/
5.50
f
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
...
...
@@ -581,13 +586,12 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
delete
debugArray
;
}
#endif
if
(
0
){
if
(
1
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
a9054686
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
a9054686
...
...
@@ -36,7 +36,21 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef INCLUDE_MI_FIELD_BUFFERS
__device__
void
sumTempBuffer
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
){
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
atomI
.
tempBuffer
[
1
]
+=
atomJ
.
tempBuffer
[
1
];
atomI
.
tempBuffer
[
2
]
+=
atomJ
.
tempBuffer
[
2
];
atomI
.
tempBufferP
[
0
]
+=
atomJ
.
tempBufferP
[
0
];
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
...
...
@@ -152,7 +166,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
// increment the field at each site due to this interaction
if
(
r2
<=
c
AmoebaSim
.
cutoffDistance2
){
if
(
r2
<=
c
Sim
.
nonbondedCutoffSqr
){
fields
[
0
][
0
]
=
fimd
[
0
]
-
fid
[
0
];
fields
[
1
][
0
]
=
fkmd
[
0
]
-
fkd
[
0
];
...
...
@@ -370,6 +384,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
...
...
@@ -389,9 +404,24 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
//gpu->sim.pInteractingWorkUnit,
//amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeMutualInducedFieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -405,14 +435,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondT
hreadsPerBlock
,
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
t
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeMutualInducedFieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
kCalculateAmoebaPmeMutualInducedFieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -717,6 +746,17 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
1
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
}
#endif
iteration
++
;
...
...
@@ -725,7 +765,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
if
(
0
){
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
a9054686
...
...
@@ -131,7 +131,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
c
AmoebaSim
.
cutoffDistance2
;
debugArray
[
index
].
z
=
c
Sim
.
nonbondedCutoffSqr
;
debugArray
[
index
].
w
=
6
.
0
f
;
...
...
@@ -209,10 +209,13 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
}
else
{
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
if
(
lasty
!=
y
)
{
unsigned
int
atomJ
=
y
+
tgx
;
...
...
@@ -229,17 +232,18 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
float
ijField
[
4
][
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
t
j
],
uscale
,
ijField
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
Idx
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
t
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
Idx
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
...
...
@@ -255,26 +259,64 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
// add to field at atomJ the field due atomI's dipole
psA
[
tj
].
field
[
0
]
+=
mask
?
ijField
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
field
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
field
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
1
][
0
]
:
0
.
0
f
;
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
tj
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
3
][
0
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
3
][
0
]
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
}
else
{
psA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
0
];
psA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
1
];
psA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
2
];
psA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
0
];
psA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
1
];
psA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
psA
[
threadIdx
.
x
],
psA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
psA
[
threadIdx
.
x
],
psA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
psA
[
threadIdx
.
x
],
psA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
psA
[
threadIdx
.
x
],
psA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
field
[
0
]
+=
psA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
psA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
psA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
psA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
psA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
psA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
psA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
psA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
psA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
psA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
psA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
psA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
t
j
)
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
t
j
)
:
atomI
;
if
(
atomI
==
targetAtom
||
(
y
+
j
Idx
)
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
Idx
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
t
j
);
debugArray
[
index
].
z
=
c
AmoebaSim
.
cutoffDistance2
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
Idx
);
debugArray
[
index
].
z
=
c
Sim
.
nonbondedCutoffSqr
;
debugArray
[
index
].
w
=
7
.
0
f
;
...
...
@@ -315,31 +357,12 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// end of j-loop
// Write results
...
...
@@ -364,8 +387,10 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
#endif
lasty
=
y
;
}
}
// end of pInteractionFlag block
}
// end of x == y block
pos
++
;
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaMapTorques.cu
View file @
a9054686
...
...
@@ -653,7 +653,7 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: numBlocks=%d numThreads=%d %d
\n
"
,
methodName
,
numBlocks
,
numThreads
,
amoebaGpu
->
maxMapTorqueDifferencePow2
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psForce
->
Download
();
psCudaForce4
->
Download
();
amoebaGpu
->
torqueMapForce
->
Download
();
amoebaGpu
->
psTorque
->
Download
();
int
maxPrint
=
10
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Post torqueMap
\n
"
);
...
...
@@ -670,6 +670,10 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"fT[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
torqueMapForce
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
torqueMapForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
torqueMapForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"T[%16.9e %16.9e %16.9e]
\n
"
,
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
1
],
...
...
@@ -741,7 +745,7 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce2( amoebaGpuContext amoebaGpu,
amoebaGpu
->
maxMapTorqueDifference
,
amoebaGpu
->
torqueMapForce
->
_pDevStream
[
0
],
psCudaForce4
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"amoebaMapTorqueReduce_kernel
2
"
);
LAUNCHERROR
(
"amoebaMapTorqueReduce_kernel
3
"
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
a9054686
...
...
@@ -353,6 +353,13 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
}
#undef USE_PERIODIC
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kFindInteractingBlocks.h"
#undef USE_PERIODIC
#undef METHOD_NAME
void
kCalculateAmoebaMultipoleForces
(
amoebaGpuContext
amoebaGpu
,
bool
hasAmoebaGeneralizedKirkwood
)
{
std
::
string
methodName
=
"kCalculateAmoebaMultipoleForces"
;
...
...
@@ -372,6 +379,37 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaFixedEField
(
amoebaGpu
);
cudaComputeAmoebaMutualInducedField
(
amoebaGpu
);
}
else
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
amoebaGpu
->
psWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Cell %8u %8u [%5u %5u %1u] "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
x
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" %8u [%5u %5u %1u] %10u
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
}
}
else
{
}
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpu
);
}
...
...
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
View file @
a9054686
...
...
@@ -4535,7 +4535,6 @@ void testUsingAmoebaTinkerParameterFile( const std::string& amoebaTinkerParamete
MapStringDouble
tinkerEnergies
;
MapStringVectorOfVectors
supplementary
;
MapStringIntI
isPresent
=
forceMap
.
find
(
AMOEBA_GK_FORCE
);
bool
gkIsActive
;
if
(
isPresent
!=
forceMap
.
end
()
&&
isPresent
->
second
!=
0
){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment