Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5b8cade7
Commit
5b8cade7
authored
Jun 04, 2009
by
Peter Eastman
Browse files
Completed CUDA implementation of new constraint algorithm
parent
47a6fb1e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
70 additions
and
80 deletions
+70
-80
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+17
-17
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+47
-28
platforms/cuda/src/kernels/kCShake.cu
platforms/cuda/src/kernels/kCShake.cu
+6
-35
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
5b8cade7
...
@@ -356,9 +356,25 @@ void CudaCalcGBSAOBCForceKernel::executeForces(OpenMMContextImpl& context) {
...
@@ -356,9 +356,25 @@ void CudaCalcGBSAOBCForceKernel::executeForces(OpenMMContextImpl& context) {
static
void
initializeIntegration
(
const
System
&
system
,
CudaPlatform
::
PlatformData
&
data
,
const
Integrator
&
integrator
)
{
static
void
initializeIntegration
(
const
System
&
system
,
CudaPlatform
::
PlatformData
&
data
,
const
Integrator
&
integrator
)
{
//
Set masses
.
//
Initialize any terms that haven't already been handled by a Force
.
_gpuContext
*
gpu
=
data
.
gpu
;
_gpuContext
*
gpu
=
data
.
gpu
;
if
(
!
data
.
hasBonds
)
gpuSetBondParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasAngles
)
gpuSetBondAngleParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasPeriodicTorsions
)
gpuSetDihedralParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
int
>
());
if
(
!
data
.
hasRB
)
gpuSetRbDihedralParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasNonbonded
)
{
gpuSetCoulombParameters
(
gpu
,
138.935485
f
,
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
char
>
(),
vector
<
vector
<
int
>
>
(),
NO_CUTOFF
);
gpuSetLJ14Parameters
(
gpu
,
138.935485
f
,
1.0
f
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
());
}
// Set masses.
int
numParticles
=
system
.
getNumParticles
();
int
numParticles
=
system
.
getNumParticles
();
vector
<
float
>
mass
(
numParticles
);
vector
<
float
>
mass
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
...
@@ -385,22 +401,6 @@ static void initializeIntegration(const System& system, CudaPlatform::PlatformDa
...
@@ -385,22 +401,6 @@ static void initializeIntegration(const System& system, CudaPlatform::PlatformDa
}
}
gpuSetConstraintParameters
(
gpu
,
particle1
,
particle2
,
distance
,
invMass1
,
invMass2
,
(
float
)
integrator
.
getConstraintTolerance
(),
4
);
gpuSetConstraintParameters
(
gpu
,
particle1
,
particle2
,
distance
,
invMass1
,
invMass2
,
(
float
)
integrator
.
getConstraintTolerance
(),
4
);
// Initialize any terms that haven't already been handled by a Force.
if
(
!
data
.
hasBonds
)
gpuSetBondParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasAngles
)
gpuSetBondAngleParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasPeriodicTorsions
)
gpuSetDihedralParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
int
>
());
if
(
!
data
.
hasRB
)
gpuSetRbDihedralParameters
(
gpu
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
());
if
(
!
data
.
hasNonbonded
)
{
gpuSetCoulombParameters
(
gpu
,
138.935485
f
,
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
char
>
(),
vector
<
vector
<
int
>
>
(),
NO_CUTOFF
);
gpuSetLJ14Parameters
(
gpu
,
138.935485
f
,
1.0
f
,
vector
<
int
>
(),
vector
<
int
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
(),
vector
<
float
>
());
}
// Finish initialization.
// Finish initialization.
gpuBuildThreadBlockWorkList
(
gpu
);
gpuBuildThreadBlockWorkList
(
gpu
);
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
5b8cade7
...
@@ -85,6 +85,18 @@ struct Constraint
...
@@ -85,6 +85,18 @@ struct Constraint
float
distance2
;
float
distance2
;
};
};
struct
ConstraintOrderer
:
public
binary_function
<
int
,
int
,
bool
>
{
const
vector
<
int
>&
atom1
;
const
vector
<
int
>&
atom2
;
ConstraintOrderer
(
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
)
:
atom1
(
atom1
),
atom2
(
atom2
)
{
}
bool
operator
()(
int
x
,
int
y
)
{
if
(
atom1
[
x
]
!=
atom1
[
y
])
return
atom1
[
x
]
<
atom1
[
y
];
return
atom2
[
x
]
<
atom2
[
y
];
}
};
struct
Molecule
{
struct
Molecule
{
vector
<
int
>
atoms
;
vector
<
int
>
atoms
;
vector
<
int
>
bonds
;
vector
<
int
>
bonds
;
...
@@ -932,10 +944,6 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -932,10 +944,6 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Compute the constraint coupling matrix
// Compute the constraint coupling matrix
vector
<
vector
<
int
>
>
atomAngles
(
gpu
->
natoms
);
vector
<
vector
<
int
>
>
atomAngles
(
gpu
->
natoms
);
...
@@ -950,34 +958,36 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -950,34 +958,36 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
continue
;
continue
;
}
}
double
scale
;
double
scale
;
int
atomj0
=
atom1
[
j
];
int
cj
=
lincsConstraints
[
j
];
int
atomj1
=
atom2
[
j
];
int
ck
=
lincsConstraints
[
k
];
int
atomk0
=
atom1
[
k
];
int
atomj0
=
atom1
[
cj
];
int
atomk1
=
atom2
[
k
];
int
atomj1
=
atom2
[
cj
];
int
atomk0
=
atom1
[
ck
];
int
atomk1
=
atom2
[
ck
];
int
atoma
,
atomb
,
atomc
;
int
atoma
,
atomb
,
atomc
;
if
(
atomj0
==
atomk0
)
{
if
(
atomj0
==
atomk0
)
{
atoma
=
atomj1
;
atoma
=
atomj1
;
atomb
=
atomj0
;
atomb
=
atomj0
;
atomc
=
atomk1
;
atomc
=
atomk1
;
scale
=
invMass1
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
scale
=
invMass1
[
c
j
]
/
(
invMass1
[
c
j
]
+
invMass2
[
c
j
]);
}
}
else
if
(
atomj1
==
atomk1
)
{
else
if
(
atomj1
==
atomk1
)
{
atoma
=
atomj0
;
atoma
=
atomj0
;
atomb
=
atomj1
;
atomb
=
atomj1
;
atomc
=
atomk0
;
atomc
=
atomk0
;
scale
=
invMass2
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
scale
=
invMass2
[
c
j
]
/
(
invMass1
[
c
j
]
+
invMass2
[
c
j
]);
}
}
else
if
(
atomj0
==
atomk1
)
{
else
if
(
atomj0
==
atomk1
)
{
atoma
=
atomj1
;
atoma
=
atomj1
;
atomb
=
atomj0
;
atomb
=
atomj0
;
atomc
=
atomk0
;
atomc
=
atomk0
;
scale
=
invMass1
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
scale
=
invMass1
[
c
j
]
/
(
invMass1
[
c
j
]
+
invMass2
[
c
j
]);
}
}
else
if
(
atomj1
==
atomk0
)
{
else
if
(
atomj1
==
atomk0
)
{
atoma
=
atomj0
;
atoma
=
atomj0
;
atomb
=
atomj1
;
atomb
=
atomj1
;
atomc
=
atomk1
;
atomc
=
atomk1
;
scale
=
invMass2
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
scale
=
invMass2
[
c
j
]
/
(
invMass1
[
c
j
]
+
invMass2
[
c
j
]);
}
}
else
else
continue
;
// These constraints are not connected.
continue
;
// These constraints are not connected.
...
@@ -987,8 +997,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -987,8 +997,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
bool
foundConstraint
=
false
;
bool
foundConstraint
=
false
;
for
(
int
other
=
0
;
other
<
numLincs
;
other
++
)
{
for
(
int
other
=
0
;
other
<
numLincs
;
other
++
)
{
if
((
atom1
[
other
]
==
atoma
&&
atom2
[
other
]
==
atomc
)
||
(
atom1
[
other
]
==
atomc
&&
atom2
[
other
]
==
atoma
))
{
if
((
atom1
[
other
]
==
atoma
&&
atom2
[
other
]
==
atomc
)
||
(
atom1
[
other
]
==
atomc
&&
atom2
[
other
]
==
atoma
))
{
double
d1
=
distance
[
j
];
double
d1
=
distance
[
c
j
];
double
d2
=
distance
[
k
];
double
d2
=
distance
[
c
k
];
double
d3
=
distance
[
other
];
double
d3
=
distance
[
other
];
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
(
d1
*
d1
+
d2
*
d2
-
d3
*
d3
)
/
(
2.0
*
d1
*
d2
)));
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
(
d1
*
d1
+
d2
*
d2
-
d3
*
d3
)
/
(
2.0
*
d1
*
d2
)));
foundConstraint
=
true
;
foundConstraint
=
true
;
...
@@ -1040,8 +1050,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1040,8 +1050,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
result
=
QUERN_multiply_with_q_transpose
(
numLincs
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
result
=
QUERN_multiply_with_q_transpose
(
numLincs
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
result
=
QUERN_solve_with_r
(
numLincs
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
result
=
QUERN_solve_with_r
(
numLincs
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
i
]
/
distance
[
j
];
double
value
=
rhs
[
j
]
*
distance
[
lincsConstraints
[
i
]]
/
distance
[
lincsConstraints
[
j
]
];
if
(
abs
(
value
)
>
0.
02
)
if
(
abs
(
value
)
>
0.
1
)
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
i
,
value
));
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
i
,
value
));
}
}
}
}
...
@@ -1053,10 +1063,18 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1053,10 +1063,18 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
++
;
maxRowElements
++
;
// Sort the constraints.
vector
<
int
>
constraintOrder
(
numLincs
);
for
(
int
i
=
0
;
i
<
numLincs
;
++
i
)
constraintOrder
[
i
]
=
i
;
sort
(
constraintOrder
.
begin
(),
constraintOrder
.
end
(),
ConstraintOrderer
(
atom1
,
atom2
));
vector
<
int
>
inverseOrder
(
numLincs
);
for
(
int
i
=
0
;
i
<
numLincs
;
++
i
)
inverseOrder
[
constraintOrder
[
i
]]
=
i
;
for
(
int
i
=
0
;
i
<
matrix
.
size
();
++
i
)
for
(
int
j
=
0
;
j
<
matrix
[
i
].
size
();
++
j
)
matrix
[
i
][
j
].
first
=
inverseOrder
[
matrix
[
i
][
j
].
first
];
// Fill in the CUDA streams.
// Fill in the CUDA streams.
...
@@ -1110,20 +1128,21 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1110,20 +1128,21 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
gpu
->
sim
.
pConstraintMatrixValue
=
psConstraintMatrixValue
->
_pDevData
;
gpu
->
sim
.
pConstraintMatrixValue
=
psConstraintMatrixValue
->
_pDevData
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
int
c
=
lincsConstraints
[
i
];
int
index
=
constraintOrder
[
i
];
int
c
=
lincsConstraints
[
index
];
(
*
psLincsAtoms
)[
i
].
x
=
atom1
[
c
];
(
*
psLincsAtoms
)[
i
].
x
=
atom1
[
c
];
(
*
psLincsAtoms
)[
i
].
y
=
atom2
[
c
];
(
*
psLincsAtoms
)[
i
].
y
=
atom2
[
c
];
(
*
psLincsDistance
)[
i
].
w
=
distance
[
c
];
(
*
psLincsDistance
)[
i
].
w
=
distance
[
c
];
(
*
psLincsS
)[
i
]
=
1.0
f
/
sqrt
(
invMass1
[
c
]
+
invMass2
[
c
]);
(
*
psLincsS
)[
i
]
=
1.0
f
/
sqrt
(
invMass1
[
c
]
+
invMass2
[
c
]);
(
*
psShakeReducedMass
)[
i
]
=
0.5
f
/
(
invMass1
[
c
]
+
invMass2
[
c
]);
(
*
psShakeReducedMass
)[
i
]
=
0.5
f
/
(
invMass1
[
c
]
+
invMass2
[
c
]);
(
*
psLincsNumConnections
)[
i
]
=
linkedConstraints
[
i
].
size
();
(
*
psLincsNumConnections
)[
i
]
=
linkedConstraints
[
i
ndex
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
ndex
].
size
();
j
++
)
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
ndex
][
j
];
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
i
].
size
();
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
i
ndex
].
size
();
j
++
)
{
(
*
psConstraintMatrixColumn
)[
i
+
j
*
numLincs
]
=
matrix
[
i
][
j
].
first
;
(
*
psConstraintMatrixColumn
)[
i
+
j
*
numLincs
]
=
matrix
[
i
ndex
][
j
].
first
;
(
*
psConstraintMatrixValue
)[
i
+
j
*
numLincs
]
=
matrix
[
i
][
j
].
second
;
(
*
psConstraintMatrixValue
)[
i
+
j
*
numLincs
]
=
matrix
[
i
ndex
][
j
].
second
;
}
}
(
*
psConstraintMatrixColumn
)[
i
+
matrix
[
i
].
size
()
*
numLincs
]
=
numLincs
;
(
*
psConstraintMatrixColumn
)[
i
+
matrix
[
i
ndex
].
size
()
*
numLincs
]
=
numLincs
;
}
}
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
(
*
psSyncCounter
)[
i
]
=
-
1
;
(
*
psSyncCounter
)[
i
]
=
-
1
;
...
@@ -1131,7 +1150,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1131,7 +1150,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(
*
psLincsNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
(
*
psLincsNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
bool
forward
=
(
atom1
[
lincsConstraints
[
atomConstraints
[
i
][
j
]]]
==
i
);
bool
forward
=
(
atom1
[
lincsConstraints
[
atomConstraints
[
i
][
j
]]]
==
i
);
(
*
psLincsAtomConstraints
)[
i
+
j
*
gpu
->
natoms
]
=
(
forward
?
atomConstraints
[
i
][
j
]
+
1
:
-
atomConstraints
[
i
][
j
]
-
1
);
(
*
psLincsAtomConstraints
)[
i
+
j
*
gpu
->
natoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]
]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]
]
-
1
);
}
}
}
}
psLincsAtoms
->
Upload
();
psLincsAtoms
->
Upload
();
...
...
platforms/cuda/src/kernels/kCShake.cu
View file @
5b8cade7
...
@@ -68,8 +68,6 @@ __device__ void kSyncAllThreads_kernel(short* syncCounter, short newCount)
...
@@ -68,8 +68,6 @@ __device__ void kSyncAllThreads_kernel(short* syncCounter, short newCount)
__global__
void
kApplyCShake_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
__global__
void
kApplyCShake_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
{
{
extern
__shared__
float
temp
[];
// Initialize counters used for monitoring convergence and doing global thread synchronization.
// Initialize counters used for monitoring convergence and doing global thread synchronization.
__shared__
unsigned
int
requiredIterations
;
__shared__
unsigned
int
requiredIterations
;
...
@@ -136,43 +134,16 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -136,43 +134,16 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
if
(
threadIdx
.
x
==
0
&&
requiredIterations
>
iteration
)
if
(
threadIdx
.
x
==
0
&&
requiredIterations
>
iteration
)
cSim
.
pRequiredIterations
[
0
]
=
requiredIterations
;
cSim
.
pRequiredIterations
[
0
]
=
requiredIterations
;
// Multiply by the inverse constraint matrix for each rigid cluster.
// Multiply by the inverse constraint matrix.
// if (cSim.rigidClusters > 0)
// {
// pos = threadIdx.x + blockIdx.x * blockDim.x;
// unsigned int block = pos/cSim.clusterShakeBlockSize;
// unsigned int indexInBlock = pos-block*cSim.clusterShakeBlockSize;
// while (block < cSim.rigidClusters)
// {
// unsigned int firstConstraint = cSim.pRigidClusterConstraintIndex[block];
// unsigned int blockSize = cSim.pRigidClusterConstraintIndex[block+1]-firstConstraint;
// if (indexInBlock < blockSize)
// {
// // Load the constraint forces and matrix.
//
// temp[threadIdx.x] = cSim.pLincsSolution[firstConstraint+indexInBlock];
// unsigned int firstMatrixIndex = cSim.pRigidClusterMatrixIndex[block];
//
// // Multiply by the matrix.
//
// float sum = 0.0f;
// for (unsigned int i = 0; i < blockSize; i++)
// sum += temp[threadIdx.x-indexInBlock+i]*cSim.pRigidClusterMatrix[firstMatrixIndex+i*blockSize+indexInBlock];
// cSim.pLincsSolution[firstConstraint+indexInBlock] = sum;
// }
// block += (blockDim.x*gridDim.x)/cSim.clusterShakeBlockSize;
// }
// kSyncAllThreads_kernel(&cSim.pSyncCounter[gridDim.x], iteration);
// }
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
while
(
pos
<
cSim
.
lincsConstraints
)
{
{
float
sum
=
0.0
f
;
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
;
i
++
)
for
(
unsigned
int
i
=
0
;
;
i
++
)
{
{
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
unsigned
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
unsigned
int
column
=
cSim
.
pConstraintMatrixColumn
[
pos
+
i
*
cSim
.
lincsConstraints
];
unsigned
int
column
=
cSim
.
pConstraintMatrixColumn
[
index
];
if
(
column
>=
cSim
.
lincsConstraints
)
if
(
column
>=
cSim
.
lincsConstraints
)
break
;
break
;
sum
+=
cSim
.
pLincsRhs1
[
column
]
*
cSim
.
pConstraintMatrixValue
[
index
];
sum
+=
cSim
.
pLincsRhs1
[
column
]
*
cSim
.
pConstraintMatrixValue
[
index
];
...
@@ -222,7 +193,7 @@ void kApplyFirstCShake(gpuContext gpu)
...
@@ -222,7 +193,7 @@ void kApplyFirstCShake(gpuContext gpu)
// printf("kApplyFirstCShake\n");
// printf("kApplyFirstCShake\n");
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
{
{
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
,
4
*
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosqP
,
true
);
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosqP
,
true
);
LAUNCHERROR
(
"kApplyCShake"
);
LAUNCHERROR
(
"kApplyCShake"
);
}
}
}
}
...
@@ -232,7 +203,7 @@ void kApplySecondCShake(gpuContext gpu)
...
@@ -232,7 +203,7 @@ void kApplySecondCShake(gpuContext gpu)
// printf("kApplySecondCShake\n");
// printf("kApplySecondCShake\n");
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
{
{
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
,
4
*
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosq
,
false
);
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosq
,
false
);
LAUNCHERROR
(
"kApplyCShake"
);
LAUNCHERROR
(
"kApplyCShake"
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment