Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5ea8680f
Commit
5ea8680f
authored
Oct 18, 2016
by
peastman
Committed by
GitHub
Oct 18, 2016
Browse files
Merge pull request #1640 from peastman/diis
Solve DIIS matrix on GPU
parents
d1caf109
c0538e1f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
129 additions
and
47 deletions
+129
-47
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+15
-45
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+3
-2
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+111
-0
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
5ea8680f
...
@@ -52,10 +52,10 @@
...
@@ -52,10 +52,10 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
#define CHECK_RESULT(result) \
#define CHECK_RESULT(result
, prefix
) \
if (result != CUDA_SUCCESS) { \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
std::stringstream m; \
m<<
errorMessage
<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
m<<
prefix
<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
throw OpenMMException(m.str());\
}
}
...
@@ -813,7 +813,7 @@ private:
...
@@ -813,7 +813,7 @@ private:
};
};
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
hasCreatedEvent
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
sphericalDipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
sphericalDipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
...
@@ -933,6 +933,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -933,6 +933,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
sort
;
delete
sort
;
if
(
hasInitializedFFT
)
if
(
hasInitializedFFT
)
cufftDestroy
(
fft
);
cufftDestroy
(
fft
);
if
(
hasCreatedEvent
)
cuEventDestroy
(
syncEvent
);
}
}
void
CudaCalcAmoebaMultipoleForceKernel
::
initialize
(
const
System
&
system
,
const
AmoebaMultipoleForce
&
force
)
{
void
CudaCalcAmoebaMultipoleForceKernel
::
initialize
(
const
System
&
system
,
const
AmoebaMultipoleForce
&
force
)
{
...
@@ -1019,6 +1021,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1019,6 +1021,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
CHECK_RESULT
(
cuEventCreate
(
&
syncEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for AmoebaMultipoleForce"
);
hasCreatedEvent
=
true
;
}
}
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
int
numOrders
=
force
.
getExtrapolationCoefficients
().
size
();
int
numOrders
=
force
.
getExtrapolationCoefficients
().
size
();
...
@@ -1210,6 +1214,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1210,6 +1214,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
solveMatrixKernel
=
cu
.
getKernel
(
module
,
"solveDIISMatrix"
);
initExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"initExtrapolatedDipoles"
);
initExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"initExtrapolatedDipoles"
);
iterateExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"iterateExtrapolatedDipoles"
);
iterateExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"iterateExtrapolatedDipoles"
);
computeExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"computeExtrapolatedDipoles"
);
computeExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"computeExtrapolatedDipoles"
);
...
@@ -1820,6 +1825,7 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
...
@@ -1820,6 +1825,7 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
inducedDipoleErrors
->
download
(
errors
,
false
);
inducedDipoleErrors
->
download
(
errors
,
false
);
cuEventRecord
(
syncEvent
,
cu
.
getCurrentStream
());
// Build the DIIS matrix.
// Build the DIIS matrix.
...
@@ -1828,15 +1834,15 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
...
@@ -1828,15 +1834,15 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
int
threadBlocks
=
min
(
numPrev
,
cu
.
getNumThreadBlocks
());
int
threadBlocks
=
min
(
numPrev
,
cu
.
getNumThreadBlocks
());
int
blockSize
=
512
;
int
blockSize
=
512
;
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
threadBlocks
*
blockSize
,
blockSize
,
blockSize
*
elementSize
);
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
threadBlocks
*
blockSize
,
blockSize
,
blockSize
*
elementSize
);
vector
<
float
>
matrixf
;
vector
<
double
>
matrix
;
// Solve the matrix.
if
(
cu
.
getUseDoublePrecision
())
diisMatrix
->
download
(
matrix
);
void
*
solveMatrixArgs
[]
=
{
&
iteration
,
&
diisMatrix
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
()};
else
cu
.
executeKernel
(
solveMatrixKernel
,
solveMatrixArgs
,
32
,
32
);
diisMatrix
->
download
(
matrixf
);
// Determine whether the iteration has converged.
// Determine whether the iteration has converged.
cuEventSynchronize
(
syncEvent
);
double
total1
=
0.0
,
total2
=
0.0
;
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
inducedDipoleErrors
->
getSize
();
j
++
)
{
for
(
int
j
=
0
;
j
<
inducedDipoleErrors
->
getSize
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total1
+=
errors
[
j
].
x
;
...
@@ -1844,42 +1850,6 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
...
@@ -1844,42 +1850,6 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
}
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
return
true
;
return
true
;
// Compute the coefficients for selecting the new dipoles.
float
*
coefficients
=
(
float
*
)
cu
.
getPinnedBuffer
();
if
(
iteration
==
0
)
coefficients
[
0
]
=
1
;
else
{
int
rank
=
numPrev
+
1
;
Array2D
<
double
>
b
(
rank
,
rank
);
b
[
0
][
0
]
=
0
;
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
b
[
i
][
0
]
=
b
[
0
][
i
]
=
-
1
;
if
(
cu
.
getUseDoublePrecision
())
{
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
b
[
i
+
1
][
j
+
1
]
=
matrix
[
i
*
MaxPrevDIISDipoles
+
j
];
}
else
{
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
b
[
i
+
1
][
j
+
1
]
=
matrixf
[
i
*
MaxPrevDIISDipoles
+
j
];
}
// Solve using LU.
JAMA
::
LU
<
double
>
lu
(
b
);
Array1D
<
double
>
x
(
rank
,
0.0
);
x
[
0
]
=
-
1.0
;
Array1D
<
double
>
coeff
=
lu
.
solve
(
x
);
coefficients
[
rank
-
1
]
=
1.0
;
for
(
int
i
=
0
;
i
<
rank
-
1
;
i
++
)
{
coefficients
[
i
]
=
coeff
[
i
+
1
];
coefficients
[
rank
-
1
]
-=
coefficients
[
i
];
}
}
diisCoefficients
->
upload
(
coefficients
,
false
);
// Compute the dipoles.
// Compute the dipoles.
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
5ea8680f
...
@@ -408,7 +408,7 @@ private:
...
@@ -408,7 +408,7 @@ private:
int
fixedFieldThreads
,
inducedFieldThreads
,
electrostaticsThreads
;
int
fixedFieldThreads
,
inducedFieldThreads
,
electrostaticsThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
double
alpha
,
inducedEpsilon
;
double
alpha
,
inducedEpsilon
;
bool
usePME
,
hasQuadrupoles
,
hasInitializedScaleFactors
,
hasInitializedFFT
,
multipolesAreValid
;
bool
usePME
,
hasQuadrupoles
,
hasInitializedScaleFactors
,
hasInitializedFFT
,
multipolesAreValid
,
hasCreatedEvent
;
AmoebaMultipoleForce
::
PolarizationType
polarizationType
;
AmoebaMultipoleForce
::
PolarizationType
polarizationType
;
CudaContext
&
cu
;
CudaContext
&
cu
;
const
System
&
system
;
const
System
&
system
;
...
@@ -471,9 +471,10 @@ private:
...
@@ -471,9 +471,10 @@ private:
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
,
solveMatrixKernel
;
CUfunction
initExtrapolatedKernel
,
iterateExtrapolatedKernel
,
computeExtrapolatedKernel
,
addExtrapolatedGradientKernel
;
CUfunction
initExtrapolatedKernel
,
iterateExtrapolatedKernel
,
computeExtrapolatedKernel
,
addExtrapolatedGradientKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CUevent
syncEvent
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
static
const
int
MaxPrevDIISDipoles
=
20
;
static
const
int
MaxPrevDIISDipoles
=
20
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
5ea8680f
...
@@ -694,6 +694,117 @@ extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int
...
@@ -694,6 +694,117 @@ extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int
}
}
}
}
extern
"C"
__global__
void
solveDIISMatrix
(
int
iteration
,
const
real
*
__restrict__
matrix
,
float
*
__restrict__
coefficients
)
{
__shared__
real
b
[
MAX_PREV_DIIS_DIPOLES
+
1
][
MAX_PREV_DIIS_DIPOLES
+
1
];
__shared__
real
piv
[
MAX_PREV_DIIS_DIPOLES
+
1
];
__shared__
real
x
[
MAX_PREV_DIIS_DIPOLES
+
1
];
// On the first iteration we don't need to do any calculation.
if
(
iteration
==
0
)
{
if
(
threadIdx
.
x
==
0
)
coefficients
[
0
]
=
1
;
return
;
}
// Load the matrix.
int
numPrev
=
min
(
iteration
+
1
,
MAX_PREV_DIIS_DIPOLES
);
int
rank
=
numPrev
+
1
;
for
(
int
index
=
threadIdx
.
x
;
index
<
numPrev
*
numPrev
;
index
+=
blockDim
.
x
)
{
int
i
=
index
/
numPrev
;
int
j
=
index
-
i
*
numPrev
;
b
[
i
+
1
][
j
+
1
]
=
matrix
[
i
*
MAX_PREV_DIIS_DIPOLES
+
j
];
}
for
(
int
i
=
threadIdx
.
x
;
i
<
rank
;
i
+=
blockDim
.
x
)
{
b
[
i
][
0
]
=
-
1
;
piv
[
i
]
=
i
;
}
__syncthreads
();
// Compute the mean absolute value of the values we just loaded. We use that for preconditioning it,
// which is essential for doing the computation in single precision.
if
(
threadIdx
.
x
==
0
)
{
real
mean
=
0
;
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
mean
+=
fabs
(
b
[
i
+
1
][
j
+
1
]);
mean
/=
numPrev
*
numPrev
;
b
[
0
][
0
]
=
0
;
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
b
[
0
][
i
]
=
-
mean
;
// Compute the LU decomposition of the matrix. This code is adapted from JAMA.
int
pivsign
=
1
;
for
(
int
j
=
0
;
j
<
rank
;
j
++
)
{
// Apply previous transformations.
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
{
// Most of the time is spent in the following dot product.
int
kmax
=
min
(
i
,
j
);
real
s
=
0
;
for
(
int
k
=
0
;
k
<
kmax
;
k
++
)
s
+=
b
[
i
][
k
]
*
b
[
k
][
j
];
b
[
i
][
j
]
-=
s
;
}
// Find pivot and exchange if necessary.
int
p
=
j
;
for
(
int
i
=
j
+
1
;
i
<
rank
;
i
++
)
if
(
abs
(
b
[
i
][
j
])
>
abs
(
b
[
p
][
j
]))
p
=
i
;
if
(
p
!=
j
)
{
int
k
=
0
;
for
(
k
=
0
;
k
<
rank
;
k
++
)
{
real
t
=
b
[
p
][
k
];
b
[
p
][
k
]
=
b
[
j
][
k
];
b
[
j
][
k
]
=
t
;
}
k
=
piv
[
p
];
piv
[
p
]
=
piv
[
j
];
piv
[
j
]
=
k
;
pivsign
=
-
pivsign
;
}
// Compute multipliers.
if
((
j
<
rank
)
&&
(
b
[
j
][
j
]
!=
0
))
for
(
int
i
=
j
+
1
;
i
<
rank
;
i
++
)
b
[
i
][
j
]
/=
b
[
j
][
j
];
}
// Solve b*Y = X(piv)
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
x
[
i
]
=
(
piv
[
i
]
==
0
?
-
1
:
0
);
for
(
int
k
=
0
;
k
<
rank
;
k
++
)
for
(
int
i
=
k
+
1
;
i
<
rank
;
i
++
)
x
[
i
]
-=
x
[
k
]
*
b
[
i
][
k
];
// Solve U*X = Y;
for
(
int
k
=
rank
-
1
;
k
>=
0
;
k
--
)
{
x
[
k
]
/=
b
[
k
][
k
];
for
(
int
i
=
0
;
i
<
k
;
i
++
)
x
[
i
]
-=
x
[
k
]
*
b
[
i
][
k
];
}
// Record the coefficients.
real
lastCoeff
=
1
;
for
(
int
i
=
0
;
i
<
rank
-
1
;
i
++
)
{
real
c
=
x
[
i
+
1
]
*
mean
;
coefficients
[
i
]
=
c
;
lastCoeff
-=
c
;
}
coefficients
[
rank
-
1
]
=
lastCoeff
;
}
}
extern
"C"
__global__
void
updateInducedFieldByDIIS
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
extern
"C"
__global__
void
updateInducedFieldByDIIS
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
real
*
__restrict__
prevDipoles
,
const
real
*
__restrict__
prevDipolesPolar
,
const
float
*
__restrict__
coefficients
,
int
numPrev
)
{
const
real
*
__restrict__
prevDipoles
,
const
real
*
__restrict__
prevDipolesPolar
,
const
float
*
__restrict__
coefficients
,
int
numPrev
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment