Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
132a94bc
"devtools/ci/gh-actions/vscode:/vscode.git/clone" did not exist on "9fe1bae6efa18a55994522a8aac4f24338a2894e"
Commit
132a94bc
authored
Jan 20, 2011
by
Mark Friedrichs
Browse files
Warp/non-warp calls were reversed in kCalculateAmoebaCudaKirkwood
parent
07f8d5ce
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
66 additions
and
11 deletions
+66
-11
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+2
-1
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+38
-4
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+2
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+2
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+2
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+2
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+16
-0
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
132a94bc
...
...
@@ -793,6 +793,8 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
if
(
data
.
getHasAmoebaGeneralizedKirkwood
()
){
kCalculateObcGbsaBornSum
(
gpu
->
gpuContext
);
kReduceObcGbsaBornSum
(
gpu
->
gpuContext
);
//initializeCudaFloatArray( gpu->gpuContext->natoms, 1, gpu->gpuContext->psBornRadii, 0.1 );
//initializeCudaFloatArray( gpu->gpuContext->natoms, 1, gpu->gpuContext->psObcChain, 0.0 );
}
// multipoles
...
...
@@ -801,7 +803,6 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
//kClearForces(gpu->gpuContext);
//kClearEnergy(gpu->gpuContext);
//(void) fprintf( data.getLog(), "computeAmoebaMultipoleForce clearing forces/energy after kCalculateAmoebaMultipoleForces()\n" );
// GK
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
132a94bc
...
...
@@ -191,12 +191,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
" gpuContext %p
\n
"
,
amoebaGpu
->
gpuContext
);
(
void
)
fprintf
(
log
,
" log %p
\n
"
,
amoebaGpu
->
log
);
(
void
)
fprintf
(
log
,
" log %p
%s
\n
"
,
amoebaGpu
->
log
,
amoebaGpu
->
log
==
stderr
?
"is stderr"
:
"is not stderr"
);
(
void
)
fprintf
(
log
,
" sm_version %u
\n
"
,
gpu
->
sm_version
);
(
void
)
fprintf
(
log
,
" device %u
\n
"
,
gpu
->
device
);
(
void
)
fprintf
(
log
,
" sharedMemoryPerBlock %u
\n
"
,
gpu
->
sharedMemoryPerBlock
);
(
void
)
fprintf
(
log
,
" pMapArray %p
\n
"
,
amoebaGpu
->
pMapArray
);
(
void
)
fprintf
(
log
,
" dMapArray %p
\n
"
,
amoebaGpu
->
dMapArray
);
(
void
)
fprintf
(
log
,
" bOutputBufferPerWarp %d
\n
"
,
amoebaGpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
log
,
" paddedNumberOfAtoms %u
\n
"
,
amoebaGpu
->
paddedNumberOfAtoms
);
(
void
)
fprintf
(
log
,
" nonbondBlocks %u
\n
"
,
amoebaGpu
->
nonbondBlocks
);
...
...
@@ -209,6 +207,13 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" outputBuffers %u
\n
"
,
amoebaGpu
->
outputBuffers
);
(
void
)
fprintf
(
log
,
" workUnits %u
\n
"
,
amoebaGpu
->
workUnits
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
gpuContext
->
psEnergy
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
gpuContext
->
psForce4
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
gpuContext
->
psPosq4
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
gpuContext
->
psObcData
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
gpuContext
->
psBornForce
,
log
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
" amoebaBonds %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBonds
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_1
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_2
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_3
,
log
);
...
...
@@ -337,6 +342,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" quartic %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradleyQuarticicParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaUreyBradleyID %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaUreyBradleyID
);
(
void
)
fprintf
(
log
,
" pAmoebaUreyBradleyParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaUreyBradleyParameter
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
// if( amoebaGpu->psRotationMatrix)(void) fprintf( log, "\n" );
// gpuPrintCudaStreamFloat( amoebaGpu->psRotationMatrix, log );
...
...
@@ -394,7 +400,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipole
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipolePolar
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipolePolar
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psCurrentEpsilon
,
log
);
(
void
)
fprintf
(
log
,
" numberOfSorWorkVectors %u
\n
"
,
amoebaGpu
->
numberOfSorWorkVectors
);
...
...
@@ -4437,3 +4442,32 @@ void gpuCopyWorkUnit( amoebaGpuContext amoebaGpu ){
}
#undef AMOEBA_DEBUG
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
@param numberOfParticles number of particles
@param entriesPerParticle entries/particles array
@param array cuda array
@param initValue vector init value
--------------------------------------------------------------------------------------- */
void
initializeCudaFloatArray
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float
>*
array
,
float
initValue
)
{
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "initializeCudaFloatArray";
// ---------------------------------------------------------------------------------------
for
(
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
for
(
int
jj
=
0
;
jj
<
entriesPerParticle
;
jj
++
)
{
array
->
_pSysStream
[
0
][
entriesPerParticle
*
ii
+
jj
]
=
initValue
;
}
}
array
->
Upload
();
}
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
132a94bc
...
...
@@ -151,6 +151,8 @@ extern void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticl
extern
void
cudaLoadCudaFloat2Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float2
>*
array
,
VectorOfDoubleVectors
&
outputVector
);
extern
void
cudaLoadCudaFloat4Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float4
>*
array
,
VectorOfDoubleVectors
&
outputVector
,
int
*
order
);
extern
void
cudaWriteVectorOfDoubleVectorsToFile
(
char
*
fname
,
std
::
vector
<
int
>&
fileId
,
VectorOfDoubleVectors
&
outputVector
);
extern
void
initializeCudaFloatArray
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float
>*
array
,
float
initValue
);
extern
void
kClearFloat
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
entries
,
CUDAStream
<
float
>*
fieldToClear
);
extern
void
kClearFloat4
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
entries
,
CUDAStream
<
float4
>*
fieldToClear
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
132a94bc
...
...
@@ -1907,7 +1907,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
6
);
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaCudaKirkwoodN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
KirkwoodParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaCudaKirkwoodN2
ByWarp
Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
KirkwoodParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
]
#ifdef AMOEBA_DEBUG
,
debugArray
->
_pDevStream
[
0
],
targetAtom
);
...
...
@@ -1924,7 +1924,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaCudaKirkwoodN2
ByWarp
Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
KirkwoodParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaCudaKirkwoodN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
KirkwoodParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
]
#ifdef AMOEBA_DEBUG
,
debugArray
->
_pDevStream
[
0
],
targetAtom
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
132a94bc
...
...
@@ -1059,6 +1059,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
timestep
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaKirkwoodEDiffN2Forces: blocks=%u threads=%u bffr/Warp=%u atm=%lu shrd=%lu"
" Ebuf=%u ixnCt=%lu workUnits=%u sm=%d device=%d sharedMemoryPerBlock=%u
\n
"
,
...
...
@@ -1068,6 +1069,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
//gpuPrintCudaAmoebaGmxSimulation(amoebaGpu, amoebaGpu->log );
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
132a94bc
...
...
@@ -502,8 +502,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
int
targetAtom
=
0
;
static
const
char
*
methodName
=
"cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply"
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: scalingDistanceCutoff=%.5f
\n
"
,
methodName
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
...
...
@@ -583,7 +582,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
//printMiFieldBuffer( amoebaGpu, 37 );
//printMiFieldBuffer( amoebaGpu, 38 );
if
(
amoebaGpu
->
log
&&
iteration
==
-
1
){
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished MI kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
132a94bc
...
...
@@ -1390,6 +1390,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque using PME
...
...
@@ -1413,7 +1414,7 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
zeroForce
(
amoebaGpu
);
}
if
(
1
){
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
132a94bc
...
...
@@ -565,7 +565,23 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
if
(
hasAmoebaGeneralizedKirkwood
){
cudaComputeAmoebaFixedEAndGkFields
(
amoebaGpu
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
0.0
);
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
0.0
);
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psGk_Field
,
0.0
);
}
cudaComputeAmoebaMutualInducedAndGkField
(
amoebaGpu
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
0.0
);
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
0.0
);
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipoleS
,
0.0
);
initializeCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolarS
,
0.0
);
amoebaGpu
->
mutualInducedDone
=
1
;
}
}
else
{
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
cudaComputeAmoebaFixedEField
(
amoebaGpu
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment