Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1d0193eb
"vscode:/vscode.git/clone" did not exist on "f08c14264ee0963f1c319f528e2b691725ff11c8"
Commit
1d0193eb
authored
Apr 09, 2010
by
Peter Eastman
Browse files
Restructured CCMA code to work on Fermi boards
parent
5301b9cc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
154 additions
and
142 deletions
+154
-142
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+1
-2
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+5
-13
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+2
-3
platforms/cuda/src/kernels/kCCMA.cu
platforms/cuda/src/kernels/kCCMA.cu
+146
-124
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
1d0193eb
...
...
@@ -460,8 +460,7 @@ struct cudaGmxSimulation {
float
*
pCcmaDelta2
;
// Workspace for CCMA
int
*
pCcmaAtomConstraints
;
// The indices of constraints involving each atom
int
*
pCcmaNumAtomConstraints
;
// The number of constraints involving each atom
short
*
pSyncCounter
;
// Used for global thread synchronization
unsigned
int
*
pRequiredIterations
;
// Used by CCMA to communicate whether iteration has converged
int
*
pCcmaConverged
;
// Used by CCMA to communicate whether iteration has converged
float
*
pCcmaReducedMass
;
// The reduced mass for each CCMA constraint
unsigned
int
*
pConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
float
*
pConstraintMatrixValue
;
// The value of each element in the constraint matrix.
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
1d0193eb
...
...
@@ -1533,12 +1533,9 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream
<
float
>*
psCcmaDelta2
=
new
CUDAStream
<
float
>
(
numCCMA
,
1
,
"CcmaDelta2"
);
gpu
->
psCcmaDelta2
=
psCcmaDelta2
;
gpu
->
sim
.
pCcmaDelta2
=
psCcmaDelta2
->
_pDevData
;
CUDAStream
<
short
>*
psSyncCounter
=
new
CUDAStream
<
short
>
(
3
*
gpu
->
sim
.
blocks
,
1
,
"SyncCounter"
);
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psRequiredIterations
=
new
CUDAStream
<
unsigned
int
>
(
1
,
1
,
"RequiredIterations"
);
gpu
->
psRequiredIterations
=
psRequiredIterations
;
gpu
->
sim
.
pRequiredIterations
=
psRequiredIterations
->
_pDevData
;
CUDAStream
<
int
>*
psCcmaConverged
=
new
CUDAStream
<
int
>
(
gpu
->
sim
.
blocks
,
1
,
"CcmaConverged"
);
gpu
->
psCcmaConverged
=
psCcmaConverged
;
gpu
->
sim
.
pCcmaConverged
=
psCcmaConverged
->
_pDevData
;
CUDAStream
<
float
>*
psCcmaReducedMass
=
new
CUDAStream
<
float
>
(
numCCMA
,
1
,
"CcmaReducedMass"
);
gpu
->
psCcmaReducedMass
=
psCcmaReducedMass
;
gpu
->
sim
.
pCcmaReducedMass
=
psCcmaReducedMass
->
_pDevData
;
...
...
@@ -1562,8 +1559,6 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
}
(
*
psConstraintMatrixColumn
)[
i
+
matrix
[
index
].
size
()
*
numCCMA
]
=
numCCMA
;
}
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
(
*
psSyncCounter
)[
i
]
=
-
1
;
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
(
*
psCcmaNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
...
...
@@ -1576,7 +1571,6 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psCcmaReducedMass
->
Upload
();
psCcmaAtomConstraints
->
Upload
();
psCcmaNumAtomConstraints
->
Upload
();
psSyncCounter
->
Upload
();
psConstraintMatrixColumn
->
Upload
();
psConstraintMatrixValue
->
Upload
();
gpu
->
sim
.
ccma_threads_per_block
=
(
gpu
->
sim
.
ccmaConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
...
...
@@ -1994,8 +1988,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
psCcmaNumAtomConstraints
=
NULL
;
gpu
->
psCcmaDelta1
=
NULL
;
gpu
->
psCcmaDelta2
=
NULL
;
gpu
->
psSyncCounter
=
NULL
;
gpu
->
psRequiredIterations
=
NULL
;
gpu
->
psCcmaConverged
=
NULL
;
gpu
->
psCcmaReducedMass
=
NULL
;
gpu
->
psConstraintMatrixColumn
=
NULL
;
gpu
->
psConstraintMatrixValue
=
NULL
;
...
...
@@ -2214,8 +2207,7 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psCcmaNumAtomConstraints
;
delete
gpu
->
psCcmaDelta1
;
delete
gpu
->
psCcmaDelta2
;
delete
gpu
->
psSyncCounter
;
delete
gpu
->
psRequiredIterations
;
delete
gpu
->
psCcmaConverged
;
delete
gpu
->
psCcmaReducedMass
;
delete
gpu
->
psConstraintMatrixColumn
;
delete
gpu
->
psConstraintMatrixValue
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
1d0193eb
...
...
@@ -171,9 +171,8 @@ struct _gpuContext {
CUDAStream
<
int
>*
psCcmaNumAtomConstraints
;
// The number of constraints involving each atom
CUDAStream
<
float
>*
psCcmaDelta1
;
// Workspace for CCMA
CUDAStream
<
float
>*
psCcmaDelta2
;
// Workspace for CCMA
CUDAStream
<
short
>*
psSyncCounter
;
// Used for global thread synchronization
CUDAStream
<
unsigned
int
>*
psRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
CUDAStream
<
float
>*
psCcmaReducedMass
;
// The reduced mass for each SHAKE constraint
CUDAStream
<
int
>*
psCcmaConverged
;
// Used by CCMA to communicate whether iteration has converged
CUDAStream
<
float
>*
psCcmaReducedMass
;
// The reduced mass for each CCMA constraint
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
...
...
platforms/cuda/src/kernels/kCCMA.cu
View file @
1d0193eb
...
...
@@ -48,25 +48,7 @@ void GetCCMASim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
/**
* Synchronize all threads across all blocks.
*/
__device__
void
kSyncAllThreads_kernel
(
short
*
syncCounter
,
short
newCount
)
{
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
syncCounter
[
blockIdx
.
x
]
=
newCount
;
if
(
threadIdx
.
x
<
gridDim
.
x
)
{
volatile
short
*
counter
=
&
syncCounter
[
threadIdx
.
x
];
do
{
}
while
(
*
counter
!=
newCount
);
}
__syncthreads
();
}
__global__
void
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
1024
,
1
)
#elif (__CUDA_ARCH__ >= 130)
...
...
@@ -74,148 +56,188 @@ __launch_bounds__(512, 1)
#else
__launch_bounds__
(
256
,
1
)
#endif
k
ApplyCCMA_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
k
ComputeCCMAConstraintDirections
(
)
{
// Initialize counters used for monitoring convergence and doing global thread synchronization.
__shared__
unsigned
int
requiredIterations
;
if
(
threadIdx
.
x
==
0
)
{
requiredIterations
=
0
;
cSim
.
pSyncCounter
[
gridDim
.
x
+
blockIdx
.
x
]
=
-
1
;
cSim
.
pSyncCounter
[
2
*
gridDim
.
x
+
blockIdx
.
x
]
=
-
1
;
cSim
.
pRequiredIterations
[
0
]
=
0
;
}
// Calculate the direction of each constraint.
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ccmaConstraints
)
for
(
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
index
<
cSim
.
ccmaConstraints
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int2
atoms
=
cSim
.
pCcmaAtoms
[
pos
];
float4
dir
=
cSim
.
pCcmaDistance
[
pos
];
int2
atoms
=
cSim
.
pCcmaAtoms
[
index
];
float4
dir
=
cSim
.
pCcmaDistance
[
index
];
float4
oldPos1
=
cSim
.
pOldPosq
[
atoms
.
x
];
float4
oldPos2
=
cSim
.
pOldPosq
[
atoms
.
y
];
dir
.
x
=
oldPos1
.
x
-
oldPos2
.
x
;
dir
.
y
=
oldPos1
.
y
-
oldPos2
.
y
;
dir
.
z
=
oldPos1
.
z
-
oldPos2
.
z
;
cSim
.
pCcmaDistance
[
pos
]
=
dir
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
cSim
.
pCcmaDistance
[
index
]
=
dir
;
}
__syncthreads
();
//
Iteratively update the atom positions
.
//
Mark that no blocks have converged yet
.
unsigned
int
maxIterations
=
150
;
for
(
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
index
<
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
cSim
.
pCcmaConverged
[
index
]
=
false
;
}
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
1024
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
512
,
1
)
#else
__launch_bounds__
(
256
,
1
)
#endif
kComputeCCMAConstraintForces
(
float4
*
atomPositions
,
bool
addOldPosition
)
{
if
(
cSim
.
pCcmaConverged
[
blockIdx
.
x
])
return
;
// The constraint iteration has already converged.
extern
__shared__
int
convergedBuffer
[];
float
lowerTol
=
1.0
f
-
2.0
f
*
cSim
.
shakeTolerance
+
cSim
.
shakeTolerance
*
cSim
.
shakeTolerance
;
float
upperTol
=
1.0
f
+
2.0
f
*
cSim
.
shakeTolerance
+
cSim
.
shakeTolerance
*
cSim
.
shakeTolerance
;
for
(
unsigned
int
iteration
=
0
;
iteration
<
maxIterations
;
iteration
++
)
{
// Calculate the constraint force for each constraint.
int
threadConverged
=
1
;
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ccmaConstraints
)
// Calculate the constraint force for each constraint.
for
(
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
index
<
cSim
.
ccmaConstraints
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int2
atoms
=
cSim
.
pCcmaAtoms
[
index
];
float4
delta1
=
atomPositions
[
atoms
.
x
];
float4
delta2
=
atomPositions
[
atoms
.
y
];
float4
dir
=
cSim
.
pCcmaDistance
[
index
];
float3
rp_ij
=
make_float3
(
delta1
.
x
-
delta2
.
x
,
delta1
.
y
-
delta2
.
y
,
delta1
.
z
-
delta2
.
z
);
if
(
addOldPosition
)
{
int2
atoms
=
cSim
.
pCcmaAtoms
[
pos
];
float4
delta1
=
atomPositions
[
atoms
.
x
];
float4
delta2
=
atomPositions
[
atoms
.
y
];
float4
dir
=
cSim
.
pCcmaDistance
[
pos
];
float3
rp_ij
=
make_float3
(
delta1
.
x
-
delta2
.
x
,
delta1
.
y
-
delta2
.
y
,
delta1
.
z
-
delta2
.
z
);
if
(
addOldPosition
)
{
rp_ij
.
x
+=
dir
.
x
;
rp_ij
.
y
+=
dir
.
y
;
rp_ij
.
z
+=
dir
.
z
;
}
float
rp2
=
rp_ij
.
x
*
rp_ij
.
x
+
rp_ij
.
y
*
rp_ij
.
y
+
rp_ij
.
z
*
rp_ij
.
z
;
float
dist2
=
dir
.
w
*
dir
.
w
;
float
diff
=
dist2
-
rp2
;
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
reducedMass
=
cSim
.
pCcmaReducedMass
[
pos
];
cSim
.
pCcmaDelta1
[
pos
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
if
(
requiredIterations
==
iteration
&&
(
rp2
<
lowerTol
*
dist2
||
rp2
>
upperTol
*
dist2
))
requiredIterations
=
iteration
+
1
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
rp_ij
.
x
+=
dir
.
x
;
rp_ij
.
y
+=
dir
.
y
;
rp_ij
.
z
+=
dir
.
z
;
}
float
rp2
=
rp_ij
.
x
*
rp_ij
.
x
+
rp_ij
.
y
*
rp_ij
.
y
+
rp_ij
.
z
*
rp_ij
.
z
;
float
dist2
=
dir
.
w
*
dir
.
w
;
float
diff
=
dist2
-
rp2
;
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
reducedMass
=
cSim
.
pCcmaReducedMass
[
index
];
cSim
.
pCcmaDelta1
[
index
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
// See whether it has converged.
threadConverged
&=
(
rp2
>=
lowerTol
*
dist2
&&
rp2
<=
upperTol
*
dist2
);
}
// Perform a parallel reduction to see if all constraints handled by this block have converged.
convergedBuffer
[
threadIdx
.
x
]
=
threadConverged
;
__syncthreads
();
for
(
unsigned
int
step
=
1
;
step
<
blockDim
.
x
;
step
*=
2
)
{
if
(
threadIdx
.
x
%
(
2
*
step
)
==
0
)
convergedBuffer
[
threadIdx
.
x
]
&=
convergedBuffer
[
threadIdx
.
x
+
step
];
__syncthreads
();
if
(
threadIdx
.
x
==
0
&&
requiredIterations
>
iteration
)
cSim
.
pRequiredIterations
[
0
]
=
requiredIterations
;
kSyncAllThreads_kernel
(
cSim
.
pSyncCounter
,
iteration
);
if
(
iteration
==
cSim
.
pRequiredIterations
[
0
])
break
;
// All constraints have converged.
}
if
(
threadIdx
.
x
==
0
)
cSim
.
pCcmaConverged
[
blockIdx
.
x
]
=
convergedBuffer
[
0
];
}
// Multiply by the inverse constraint matrix.
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
1024
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
512
,
1
)
#else
__launch_bounds__
(
256
,
1
)
#endif
kMultiplyByCCMAConstraintMatrix
()
{
extern
__shared__
int
convergedBuffer
[];
// First see whether all work groups have converged.
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ccmaConstraints
)
{
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
;
i
++
)
{
unsigned
int
index
=
pos
+
i
*
cSim
.
ccmaConstraints
;
unsigned
int
column
=
cSim
.
pConstraintMatrixColumn
[
index
];
if
(
column
>=
cSim
.
ccmaConstraints
)
break
;
sum
+=
cSim
.
pCcmaDelta1
[
column
]
*
cSim
.
pConstraintMatrixValue
[
index
];
}
cSim
.
pCcmaDelta2
[
pos
]
=
sum
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
gridDim
.
x
],
iteration
);
convergedBuffer
[
threadIdx
.
x
]
=
true
;
for
(
int
index
=
threadIdx
.
x
;
index
<
gridDim
.
x
;
index
+=
blockDim
.
x
)
convergedBuffer
[
threadIdx
.
x
]
&=
cSim
.
pCcmaConverged
[
index
];
__syncthreads
();
for
(
int
step
=
1
;
step
<
blockDim
.
x
;
step
*=
2
)
{
if
(
threadIdx
.
x
%
(
2
*
step
)
==
0
)
convergedBuffer
[
threadIdx
.
x
]
&=
convergedBuffer
[
threadIdx
.
x
+
step
];
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
cSim
.
pCcmaConverged
[
blockIdx
.
x
]
=
convergedBuffer
[
0
];
if
(
cSim
.
pCcmaConverged
[
0
])
return
;
// The constraint iteration has already converged.
//
Update the position of each atom
.
//
Multiply by the inverse constraint matrix
.
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float
damping
=
(
iteration
<
2
?
0.5
f
:
1.0
f
);
while
(
pos
<
cSim
.
atoms
)
for
(
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
index
<
cSim
.
ccmaConstraints
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
;
i
++
)
{
float4
atomPos
=
atomPositions
[
pos
];
float
invMass
=
cSim
.
pVelm4
[
pos
].
w
;
int
num
=
cSim
.
pCcmaNumAtomConstraints
[
pos
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
constraint
=
cSim
.
pCcmaAtomConstraints
[
index
];
bool
forward
=
(
constraint
>
0
);
constraint
=
(
forward
?
constraint
-
1
:
-
constraint
-
1
);
float
constraintForce
=
damping
*
invMass
*
cSim
.
pCcmaDelta2
[
constraint
];
constraintForce
=
(
forward
?
constraintForce
:
-
constraintForce
);
float4
dir
=
cSim
.
pCcmaDistance
[
constraint
];
atomPos
.
x
+=
constraintForce
*
dir
.
x
;
atomPos
.
y
+=
constraintForce
*
dir
.
y
;
atomPos
.
z
+=
constraintForce
*
dir
.
z
;
}
atomPositions
[
pos
]
=
atomPos
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
unsigned
int
element
=
index
+
i
*
cSim
.
ccmaConstraints
;
unsigned
int
column
=
cSim
.
pConstraintMatrixColumn
[
element
];
if
(
column
>=
cSim
.
ccmaConstraints
)
break
;
sum
+=
cSim
.
pCcmaDelta1
[
column
]
*
cSim
.
pConstraintMatrixValue
[
element
];
}
if
(
threadIdx
.
x
==
0
)
requiredIterations
=
iteration
+
1
;
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
2
*
gridDim
.
x
],
iteration
);
cSim
.
pCcmaDelta2
[
index
]
=
sum
;
}
}
// Reset the initial sync counter to be ready for the next call.
__global__
void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
1024
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
512
,
1
)
#else
__launch_bounds__
(
256
,
1
)
#endif
kUpdateCCMAAtomPositions
(
float4
*
atomPositions
,
int
iteration
)
{
if
(
cSim
.
pCcmaConverged
[
blockIdx
.
x
])
return
;
// The constraint iteration has already converged.
float
damping
=
(
iteration
<
2
?
0.5
f
:
1.0
f
);
for
(
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
index
<
cSim
.
atoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
atomPos
=
atomPositions
[
index
];
float
invMass
=
cSim
.
pVelm4
[
index
].
w
;
int
num
=
cSim
.
pCcmaNumAtomConstraints
[
index
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
constraint
=
cSim
.
pCcmaAtomConstraints
[
index
+
i
*
cSim
.
atoms
];
bool
forward
=
(
constraint
>
0
);
constraint
=
(
forward
?
constraint
-
1
:
-
constraint
-
1
);
float
constraintForce
=
damping
*
invMass
*
cSim
.
pCcmaDelta2
[
constraint
];
constraintForce
=
(
forward
?
constraintForce
:
-
constraintForce
);
float4
dir
=
cSim
.
pCcmaDistance
[
constraint
];
atomPos
.
x
+=
constraintForce
*
dir
.
x
;
atomPos
.
y
+=
constraintForce
*
dir
.
y
;
atomPos
.
z
+=
constraintForce
*
dir
.
z
;
}
atomPositions
[
index
]
=
atomPos
;
}
}
if
(
threadIdx
.
x
==
0
)
cSim
.
pSyncCounter
[
blockIdx
.
x
]
=
-
1
;
void
kApplyCCMA
(
gpuContext
gpu
,
float4
*
posq
,
bool
addOldPosition
)
{
kComputeCCMAConstraintDirections
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
ccma_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeCCMAConstraintDirections"
);
for
(
int
i
=
0
;
i
<
150
;
i
++
)
{
kComputeCCMAConstraintForces
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
ccma_threads_per_block
>>>
(
posq
,
addOldPosition
);
kMultiplyByCCMAConstraintMatrix
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
ccma_threads_per_block
>>>
();
gpu
->
psCcmaConverged
->
Download
();
if
((
*
gpu
->
psCcmaConverged
)[
0
])
break
;
kUpdateCCMAAtomPositions
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
ccma_threads_per_block
>>>
(
posq
,
i
);
}
}
void
kApplyFirstCCMA
(
gpuContext
gpu
)
{
// printf("kApplyFirstCCMA\n");
if
(
gpu
->
sim
.
ccmaConstraints
>
0
)
{
kApplyCCMA_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
ccma_threads_per_block
>>>
(
gpu
->
sim
.
pPosqP
,
true
);
LAUNCHERROR
(
"kApplyCCMA"
);
}
kApplyCCMA
(
gpu
,
gpu
->
sim
.
pPosqP
,
true
);
}
void
kApplySecondCCMA
(
gpuContext
gpu
)
{
// printf("kApplySecondCCMA\n");
if
(
gpu
->
sim
.
ccmaConstraints
>
0
)
{
kApplyCCMA_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
ccma_threads_per_block
>>>
(
gpu
->
sim
.
pPosq
,
false
);
LAUNCHERROR
(
"kApplyCCMA"
);
}
kApplyCCMA
(
gpu
,
gpu
->
sim
.
pPosq
,
false
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment