Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3749e84b
"...src/SimTKReference/ReferenceVelocityVerletDynamics.cpp" did not exist on "9111cca092a59ae505625da07abf5c7d41a80925"
Commit
3749e84b
authored
Jun 24, 2009
by
Peter Eastman
Browse files
Various cleanups to integration code
parent
1ef6e1d8
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
946 additions
and
220 deletions
+946
-220
openmmapi/include/openmm/VerletIntegrator.h
openmmapi/include/openmm/VerletIntegrator.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+2
-2
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+6
-4
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+2
-1
platforms/cuda/src/kernels/kBrownianUpdate.cu
platforms/cuda/src/kernels/kBrownianUpdate.cu
+1
-14
platforms/cuda/src/kernels/kLangevinUpdate.cu
platforms/cuda/src/kernels/kLangevinUpdate.cu
+100
-0
platforms/cuda/src/kernels/kLangevinUpdate.h
platforms/cuda/src/kernels/kLangevinUpdate.h
+224
-0
platforms/cuda/src/kernels/kSettle.cu
platforms/cuda/src/kernels/kSettle.cu
+0
-2
platforms/cuda/src/kernels/kShakeH.cu
platforms/cuda/src/kernels/kShakeH.cu
+432
-0
platforms/cuda/src/kernels/kVerletUpdate.cu
platforms/cuda/src/kernels/kVerletUpdate.cu
+4
-196
platforms/cuda/src/kernels/kVerletUpdate.h
platforms/cuda/src/kernels/kVerletUpdate.h
+174
-0
No files found.
openmmapi/include/openmm/VerletIntegrator.h
View file @
3749e84b
...
@@ -39,7 +39,7 @@
...
@@ -39,7 +39,7 @@
namespace
OpenMM
{
namespace
OpenMM
{
/**
/**
* This is an Integrator which simulates a System using the
velocity
Verlet algorithm.
* This is an Integrator which simulates a System using the
leap-frog
Verlet algorithm.
*/
*/
class
OPENMM_EXPORT
VerletIntegrator
:
public
Integrator
{
class
OPENMM_EXPORT
VerletIntegrator
:
public
Integrator
{
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
3749e84b
...
@@ -488,7 +488,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
...
@@ -488,7 +488,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
prevFriction
=
friction
;
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
prevStepSize
=
stepSize
;
}
}
kUpdatePart1
(
gpu
);
k
Langevin
UpdatePart1
(
gpu
);
kApplyFirstShake
(
gpu
);
kApplyFirstShake
(
gpu
);
kApplyFirstSettle
(
gpu
);
kApplyFirstSettle
(
gpu
);
kApplyFirstCCMA
(
gpu
);
kApplyFirstCCMA
(
gpu
);
...
@@ -497,7 +497,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
...
@@ -497,7 +497,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
if
(
step
%
data
.
cmMotionFrequency
==
0
)
if
(
step
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
gpu
->
bCalculateCM
=
true
;
}
}
kUpdatePart2
(
gpu
);
k
Langevin
UpdatePart2
(
gpu
);
kApplySecondShake
(
gpu
);
kApplySecondShake
(
gpu
);
kApplySecondSettle
(
gpu
);
kApplySecondSettle
(
gpu
);
kApplySecondCCMA
(
gpu
);
kApplySecondCCMA
(
gpu
);
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
3749e84b
...
@@ -40,14 +40,14 @@ extern void kCalculateObcGbsaForces2(gpuContext gpu);
...
@@ -40,14 +40,14 @@ extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
extern
void
kCalculateAndersenThermostat
(
gpuContext
gpu
);
extern
void
kCalculateAndersenThermostat
(
gpuContext
gpu
);
extern
void
kReduceBornSumAndForces
(
gpuContext
gpu
);
extern
void
kReduceBornSumAndForces
(
gpuContext
gpu
);
extern
void
kUpdatePart1
(
gpuContext
gpu
);
extern
void
kApplyFirstShake
(
gpuContext
gpu
);
extern
void
kApplyFirstShake
(
gpuContext
gpu
);
extern
void
kApplyFirstCCMA
(
gpuContext
gpu
);
extern
void
kApplyFirstCCMA
(
gpuContext
gpu
);
extern
void
kApplyFirstSettle
(
gpuContext
gpu
);
extern
void
kApplyFirstSettle
(
gpuContext
gpu
);
extern
void
kUpdatePart2
(
gpuContext
gpu
);
extern
void
kApplySecondShake
(
gpuContext
gpu
);
extern
void
kApplySecondShake
(
gpuContext
gpu
);
extern
void
kApplySecondCCMA
(
gpuContext
gpu
);
extern
void
kApplySecondCCMA
(
gpuContext
gpu
);
extern
void
kApplySecondSettle
(
gpuContext
gpu
);
extern
void
kApplySecondSettle
(
gpuContext
gpu
);
extern
void
kLangevinUpdatePart1
(
gpuContext
gpu
);
extern
void
kLangevinUpdatePart2
(
gpuContext
gpu
);
extern
void
kVerletUpdatePart1
(
gpuContext
gpu
);
extern
void
kVerletUpdatePart1
(
gpuContext
gpu
);
extern
void
kVerletUpdatePart2
(
gpuContext
gpu
);
extern
void
kVerletUpdatePart2
(
gpuContext
gpu
);
extern
void
kBrownianUpdatePart1
(
gpuContext
gpu
);
extern
void
kBrownianUpdatePart1
(
gpuContext
gpu
);
...
@@ -72,8 +72,10 @@ extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
...
@@ -72,8 +72,10 @@ extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
extern
void
GetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
GetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
SetForcesSim
(
gpuContext
gpu
);
extern
void
SetForcesSim
(
gpuContext
gpu
);
extern
void
GetForcesSim
(
gpuContext
gpu
);
extern
void
GetForcesSim
(
gpuContext
gpu
);
extern
void
SetUpdateShakeHSim
(
gpuContext
gpu
);
extern
void
SetShakeHSim
(
gpuContext
gpu
);
extern
void
GetUpdateShakeHSim
(
gpuContext
gpu
);
extern
void
GetShakeHSim
(
gpuContext
gpu
);
extern
void
SetLangevinUpdateSim
(
gpuContext
gpu
);
extern
void
GetLangevinUpdateSim
(
gpuContext
gpu
);
extern
void
SetSettleSim
(
gpuContext
gpu
);
extern
void
SetSettleSim
(
gpuContext
gpu
);
extern
void
GetSettleSim
(
gpuContext
gpu
);
extern
void
GetSettleSim
(
gpuContext
gpu
);
extern
void
SetCCMASim
(
gpuContext
gpu
);
extern
void
SetCCMASim
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
3749e84b
...
@@ -1757,7 +1757,8 @@ int gpuSetConstants(gpuContext gpu)
...
@@ -1757,7 +1757,8 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetForcesSim
(
gpu
);
SetForcesSim
(
gpu
);
SetUpdateShakeHSim
(
gpu
);
SetShakeHSim
(
gpu
);
SetLangevinUpdateSim
(
gpu
);
SetVerletUpdateSim
(
gpu
);
SetVerletUpdateSim
(
gpu
);
SetBrownianUpdateSim
(
gpu
);
SetBrownianUpdateSim
(
gpu
);
SetSettleSim
(
gpu
);
SetSettleSim
(
gpu
);
...
...
platforms/cuda/src/kernels/kBrownianUpdate.cu
View file @
3749e84b
...
@@ -35,8 +35,6 @@ using namespace std;
...
@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#include "gputypes.h"
#define DeltaShake
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaGmxSimulation
cSim
;
void
SetBrownianUpdateSim
(
gpuContext
gpu
)
void
SetBrownianUpdateSim
(
gpuContext
gpu
)
...
@@ -66,15 +64,9 @@ __global__ void kBrownianUpdatePart1_kernel()
...
@@ -66,15 +64,9 @@ __global__ void kBrownianUpdatePart1_kernel()
float4
force
=
cSim
.
pForce4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
cSim
.
pOldPosq
[
pos
]
=
apos
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
#ifndef DeltaShake
apos
.
x
+=
force
.
x
*
cSim
.
GDT
+
random4a
.
x
;
apos
.
y
+=
force
.
y
*
cSim
.
GDT
+
random4a
.
y
;
apos
.
z
+=
force
.
z
*
cSim
.
GDT
+
random4a
.
z
;
#else
apos
.
x
=
force
.
x
*
cSim
.
GDT
+
random4a
.
x
;
apos
.
x
=
force
.
x
*
cSim
.
GDT
+
random4a
.
x
;
apos
.
y
=
force
.
y
*
cSim
.
GDT
+
random4a
.
y
;
apos
.
y
=
force
.
y
*
cSim
.
GDT
+
random4a
.
y
;
apos
.
z
=
force
.
z
*
cSim
.
GDT
+
random4a
.
z
;
apos
.
z
=
force
.
z
*
cSim
.
GDT
+
random4a
.
z
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pPosqP
[
pos
]
=
apos
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
...
@@ -99,11 +91,6 @@ __global__ void kBrownianUpdatePart2_kernel()
...
@@ -99,11 +91,6 @@ __global__ void kBrownianUpdatePart2_kernel()
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
#ifndef DeltaShake
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
-
apos
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
-
apos
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
-
apos
.
z
);
#else
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
...
@@ -111,7 +98,7 @@ __global__ void kBrownianUpdatePart2_kernel()
...
@@ -111,7 +98,7 @@ __global__ void kBrownianUpdatePart2_kernel()
xPrime
.
x
+=
apos
.
x
;
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
xPrime
.
z
+=
apos
.
z
;
#endif
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
...
...
platforms/cuda/src/kernels/kLangevinUpdate.cu
0 → 100755
View file @
3749e84b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
using
namespace
std
;
#include "gputypes.h"
static
__constant__
cudaGmxSimulation
cSim
;
void
SetLangevinUpdateSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetLangevinUpdateSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
// Include versions of the kernels with and with center of mass motion removal.
#include "kLangevinUpdate.h"
#define REMOVE_CM
#include "kLangevinUpdate.h"
void
kLangevinUpdatePart1
(
gpuContext
gpu
)
{
// printf("kLangevinUpdatePart1\n");
if
(
gpu
->
bRemoveCM
)
{
kLangevinUpdatePart1CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart1CM"
);
gpu
->
bRemoveCM
=
false
;
}
else
{
kLangevinUpdatePart1_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart1"
);
}
}
extern
void
kGenerateRandoms
(
gpuContext
gpu
);
void
kLangevinUpdatePart2
(
gpuContext
gpu
)
{
// printf("kLangevinUpdatePart2\n");
if
(
gpu
->
bCalculateCM
)
{
kLangevinUpdatePart2CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart2CM"
);
gpu
->
bCalculateCM
=
false
;
gpu
->
bRemoveCM
=
true
;
}
else
{
kLangevinUpdatePart2_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart2"
);
}
// Update randoms if necessary
gpu
->
iterations
++
;
if
(
gpu
->
iterations
==
gpu
->
sim
.
randomIterations
)
{
kGenerateRandoms
(
gpu
);
gpu
->
iterations
=
0
;
}
}
platforms/cuda/src/kernels/kLangevinUpdate.h
0 → 100644
View file @
3749e84b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Langevin integration. It is included
* several times in kLangevinUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__
void
kLangevinUpdatePart1CM_kernel
()
#else
__global__
void
kLangevinUpdatePart1_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
float4
CM1
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xVector
=
cSim
.
pxVector4
[
pos
];
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float3
Vmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
Vmh
.
x
=
xVector
.
x
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
x
;
Vmh
.
y
=
xVector
.
y
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
y
;
Vmh
.
z
=
xVector
.
z
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
z
;
float4
vVector
;
vVector
.
x
=
sqrtInvMass
*
random4a
.
w
;
vVector
.
y
=
sqrtInvMass
*
random2a
.
x
;
vVector
.
z
=
sqrtInvMass
*
random2a
.
y
;
vVector
.
w
=
0
.
0
f
;
cSim
.
pvVector4
[
pos
]
=
vVector
;
velocity
.
x
=
velocity
.
x
*
cSim
.
EM
+
velocity
.
w
*
force
.
x
*
cSim
.
TauOneMinusEM
+
vVector
.
x
-
cSim
.
EM
*
Vmh
.
x
;
velocity
.
y
=
velocity
.
y
*
cSim
.
EM
+
velocity
.
w
*
force
.
y
*
cSim
.
TauOneMinusEM
+
vVector
.
y
-
cSim
.
EM
*
Vmh
.
y
;
velocity
.
z
=
velocity
.
z
*
cSim
.
EM
+
velocity
.
w
*
force
.
z
*
cSim
.
TauOneMinusEM
+
vVector
.
z
-
cSim
.
EM
*
Vmh
.
z
;
#ifdef REMOVE_CM
velocity
.
x
-=
sCM
[
0
].
x
;
velocity
.
y
-=
sCM
[
0
].
y
;
velocity
.
z
-=
sCM
[
0
].
z
;
#endif
cSim
.
pOldPosq
[
pos
]
=
apos
;
apos
.
x
=
velocity
.
x
*
cSim
.
fix1
;
apos
.
y
=
velocity
.
y
*
cSim
.
fix1
;
apos
.
z
=
velocity
.
z
*
cSim
.
fix1
;
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
#ifdef REMOVE_CM
__global__
void
kLangevinUpdatePart2CM_kernel
()
#else
__global__
void
kLangevinUpdatePart2_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
__syncthreads
();
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
velocity
.
x
=
xPrime
.
x
*
cSim
.
oneOverFix1
;
velocity
.
y
=
xPrime
.
y
*
cSim
.
oneOverFix1
;
velocity
.
z
=
xPrime
.
z
*
cSim
.
oneOverFix1
;
#ifdef REMOVE_CM
float
mass
=
1
.
0
f
/
velocity
.
w
;
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
#endif;
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Update random position pointer
if
(
threadIdx
.
x
==
0
)
{
rpos
+=
cSim
.
paddedNumberOfAtoms
;
if
(
rpos
>
cSim
.
randoms
)
rpos
-=
cSim
.
randoms
;
cSim
.
pRandomPosition
[
blockIdx
.
x
]
=
rpos
;
}
#ifdef REMOVE_CM
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0
.
0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
#endif
}
platforms/cuda/src/kernels/kSettle.cu
View file @
3749e84b
...
@@ -33,8 +33,6 @@
...
@@ -33,8 +33,6 @@
//#include <fstream>
//#include <fstream>
using
namespace
std
;
using
namespace
std
;
#define DeltaShake
#include "gputypes.h"
#include "gputypes.h"
...
...
platforms/cuda/src/kernels/k
Update
ShakeH.cu
→
platforms/cuda/src/kernels/kShakeH.cu
100755 → 100644
View file @
3749e84b
/* -------------------------------------------------------------------------- *
/* -------------------------------------------------------------------------- *
* OpenMM *
* OpenMM *
* -------------------------------------------------------------------------- *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
* This program is free software: you can redistribute it and/or modify *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* (at your option) any later version. *
* *
* *
* This program is distributed in the hope that it will be useful, *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* GNU Lesser General Public License for more details. *
* *
* *
* You should have received a copy of the GNU Lesser General Public License *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <stdio.h>
#include <cuda.h>
#include <cuda.h>
#include <vector_functions.h>
#include <vector_functions.h>
#include <cstdlib>
#include <cstdlib>
#include <string>
#include <string>
#include <iostream>
#include <iostream>
//#include <fstream>
using
namespace
std
;
using
namespace
std
;
#include "gputypes.h"
#define DeltaShake
struct
Atom
#include "gputypes.h"
{
float3
rij1
;
struct
Atom
float3
rij2
;
{
float3
rij3
;
float3
rij1
;
float
M
;
float3
rij2
;
float
d2
;
float3
rij3
;
float
InvMassI
;
float
M
;
float
rij1sq
;
float
d2
;
float
rij2sq
;
float
InvMassI
;
float
rij3sq
;
float
rij1sq
;
};
float
rij2sq
;
float
rij3sq
;
};
static
__constant__
cudaGmxSimulation
cSim
;
void
SetShakeHSim
(
gpuContext
gpu
)
static
__constant__
cudaGmxSimulation
cSim
;
{
cudaError_t
status
;
void
SetUpdateShakeHSim
(
gpuContext
gpu
)
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
{
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
cudaError_t
status
;
}
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
void
GetShakeHSim
(
gpuContext
gpu
)
}
{
cudaError_t
status
;
void
GetUpdateShakeHSim
(
gpuContext
gpu
)
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
{
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
cudaError_t
status
;
}
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
__global__
void
kApplyFirstShake_kernel
()
}
{
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
__global__
void
kUpdatePart1_kernel
()
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ShakeConstraints
)
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
{
__syncthreads
();
int4
atomID
=
cSim
.
pShakeID
[
pos
];
float4
params
=
cSim
.
pShakeParameter
[
pos
];
while
(
pos
<
cSim
.
atoms
)
float4
apos
=
cSim
.
pOldPosq
[
atomID
.
x
];
{
float4
xpi
=
cSim
.
pPosqP
[
atomID
.
x
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
float4
xVector
=
cSim
.
pxVector4
[
pos
];
float4
xpj1
=
cSim
.
pPosqP
[
atomID
.
y
];
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
float4
apos2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
float4
xpj2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
float4
apos
=
cSim
.
pPosq
[
pos
];
psA
->
InvMassI
=
params
.
x
;
float4
force
=
cSim
.
pForce4
[
pos
];
psA
->
M
=
params
.
y
;
psA
->
d2
=
params
.
z
;
float3
Vmh
;
float
invMassJ
=
params
.
w
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
if
(
atomID
.
z
!=
-
1
)
Vmh
.
x
=
xVector
.
x
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
x
;
{
Vmh
.
y
=
xVector
.
y
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
y
;
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
Vmh
.
z
=
xVector
.
z
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
z
;
xpj2
=
cSim
.
pPosqP
[
atomID
.
z
];
float4
vVector
;
}
vVector
.
x
=
sqrtInvMass
*
random4a
.
w
;
float4
apos3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
vVector
.
y
=
sqrtInvMass
*
random2a
.
x
;
float4
xpj3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
vVector
.
z
=
sqrtInvMass
*
random2a
.
y
;
if
(
atomID
.
w
!=
-
1
)
vVector
.
w
=
0.0
f
;
{
cSim
.
pvVector4
[
pos
]
=
vVector
;
apos3
=
cSim
.
pOldPosq
[
atomID
.
w
];
velocity
.
x
=
velocity
.
x
*
cSim
.
EM
+
xpj3
=
cSim
.
pPosqP
[
atomID
.
w
];
velocity
.
w
*
force
.
x
*
cSim
.
TauOneMinusEM
+
}
vVector
.
x
-
cSim
.
EM
*
Vmh
.
x
;
float3
xi
,
xj1
,
xj2
,
xj3
;
velocity
.
y
=
velocity
.
y
*
cSim
.
EM
+
xi
.
x
=
apos
.
x
;
velocity
.
w
*
force
.
y
*
cSim
.
TauOneMinusEM
+
xi
.
y
=
apos
.
y
;
vVector
.
y
-
xi
.
z
=
apos
.
z
;
cSim
.
EM
*
Vmh
.
y
;
xj1
.
x
=
apos1
.
x
;
velocity
.
z
=
velocity
.
z
*
cSim
.
EM
+
xj1
.
y
=
apos1
.
y
;
velocity
.
w
*
force
.
z
*
cSim
.
TauOneMinusEM
+
xj1
.
z
=
apos1
.
z
;
vVector
.
z
-
xj2
.
x
=
apos2
.
x
;
cSim
.
EM
*
Vmh
.
z
;
xj2
.
y
=
apos2
.
y
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
xj2
.
z
=
apos2
.
z
;
#ifndef DeltaShake
xj3
.
x
=
apos3
.
x
;
apos
.
x
+=
velocity
.
x
*
cSim
.
fix1
;
xj3
.
y
=
apos3
.
y
;
apos
.
y
+=
velocity
.
y
*
cSim
.
fix1
;
xj3
.
z
=
apos3
.
z
;
apos
.
z
+=
velocity
.
z
*
cSim
.
fix1
;
psA
->
rij1
.
x
=
xi
.
x
-
xj1
.
x
;
#else
psA
->
rij1
.
y
=
xi
.
y
-
xj1
.
y
;
apos
.
x
=
velocity
.
x
*
cSim
.
fix1
;
psA
->
rij1
.
z
=
xi
.
z
-
xj1
.
z
;
apos
.
y
=
velocity
.
y
*
cSim
.
fix1
;
psA
->
rij2
.
x
=
xi
.
x
-
xj2
.
x
;
apos
.
z
=
velocity
.
z
*
cSim
.
fix1
;
psA
->
rij2
.
y
=
xi
.
y
-
xj2
.
y
;
#endif
psA
->
rij2
.
z
=
xi
.
z
-
xj2
.
z
;
cSim
.
pPosqP
[
pos
]
=
apos
;
psA
->
rij3
.
x
=
xi
.
x
-
xj3
.
x
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
psA
->
rij3
.
y
=
xi
.
y
-
xj3
.
y
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
psA
->
rij3
.
z
=
xi
.
z
-
xj3
.
z
;
}
psA
->
rij1sq
=
psA
->
rij1
.
x
*
psA
->
rij1
.
x
+
psA
->
rij1
.
y
*
psA
->
rij1
.
y
+
psA
->
rij1
.
z
*
psA
->
rij1
.
z
;
}
psA
->
rij2sq
=
psA
->
rij2
.
x
*
psA
->
rij2
.
x
+
psA
->
rij2
.
y
*
psA
->
rij2
.
y
+
psA
->
rij2
.
z
*
psA
->
rij2
.
z
;
psA
->
rij3sq
=
psA
->
rij3
.
x
*
psA
->
rij3
.
x
+
psA
->
rij3
.
y
*
psA
->
rij3
.
y
+
psA
->
rij3
.
z
*
psA
->
rij3
.
z
;
__global__
void
kUpdatePart1CM_kernel
()
float
ld1
=
psA
->
d2
-
psA
->
rij1sq
;
{
float
ld2
=
psA
->
d2
-
psA
->
rij2sq
;
extern
__shared__
float3
sCM
[];
float
ld3
=
psA
->
d2
-
psA
->
rij3sq
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
bool
converged
=
false
;
float4
CM1
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
int
iteration
=
0
;
while
(
iteration
<
15
&&
!
converged
)
// Read CM outputs from previous step
{
unsigned
int
cpos
=
threadIdx
.
x
;
converged
=
true
;
#if 0
float3
rpij
;
float4 CM2 = { 0.0f, 0.0f, 0.0f, 0.0f };
rpij
.
x
=
xpi
.
x
-
xpj1
.
x
;
float4 CM3 = { 0.0f, 0.0f, 0.0f, 0.0f };
rpij
.
y
=
xpi
.
y
-
xpj1
.
y
;
float4 CM4 = { 0.0f, 0.0f, 0.0f, 0.0f };
rpij
.
z
=
xpi
.
z
-
xpj1
.
z
;
if (cpos < gridDim.x)
float
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
CM1 = cSim.pLinearMomentum[cpos];
float
rrpr
=
psA
->
rij1
.
x
*
rpij
.
x
+
psA
->
rij1
.
y
*
rpij
.
y
+
psA
->
rij1
.
z
*
rpij
.
z
;
cpos += gridDim.x;
float
diff
=
fabs
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if (cpos < gridDim.x)
if
(
diff
>=
1.0
f
)
CM2 = cSim.pLinearMomentum[cpos];
{
cpos += gridDim.x;
float
acor
=
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij1sq
);
if (cpos < gridDim.x)
float3
dr
;
CM3 = cSim.pLinearMomentum[cpos];
dr
.
x
=
psA
->
rij1
.
x
*
acor
;
cpos += gridDim.x;
dr
.
y
=
psA
->
rij1
.
y
*
acor
;
if (cpos < gridDim.x)
dr
.
z
=
psA
->
rij1
.
z
*
acor
;
CM4 = cSim.pLinearMomentum[cpos];
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
sCM[threadIdx.x].x = CM1.x + CM2.x + CM3.x + CM4.x;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
sCM[threadIdx.x].y = CM1.y + CM2.y + CM3.y + CM4.y;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
sCM[threadIdx.x].z = CM1.z + CM2.z + CM3.z + CM4.z;
xpj1
.
x
-=
dr
.
x
*
invMassJ
;
#else
xpj1
.
y
-=
dr
.
y
*
invMassJ
;
while
(
cpos
<
gridDim
.
x
)
xpj1
.
z
-=
dr
.
z
*
invMassJ
;
{
converged
=
false
;
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
}
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
if
(
atomID
.
z
!=
-
1
)
CM
.
z
+=
CM1
.
z
;
{
cpos
+=
blockDim
.
x
;
rpij
.
x
=
xpi
.
x
-
xpj2
.
x
;
}
rpij
.
y
=
xpi
.
y
-
xpj2
.
y
;
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
rpij
.
z
=
xpi
.
z
-
xpj2
.
z
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
rrpr
=
psA
->
rij2
.
x
*
rpij
.
x
+
psA
->
rij2
.
y
*
rpij
.
y
+
psA
->
rij2
.
z
*
rpij
.
z
;
#endif
diff
=
fabs
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
__syncthreads
();
if
(
diff
>=
1.0
f
)
{
// Reduce CM
float
acor
=
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij2sq
);
unsigned
int
offset
=
1
;
float3
dr
;
unsigned
int
mask
=
1
;
dr
.
x
=
psA
->
rij2
.
x
*
acor
;
while
(
offset
<
blockDim
.
x
)
dr
.
y
=
psA
->
rij2
.
y
*
acor
;
{
dr
.
z
=
psA
->
rij2
.
z
*
acor
;
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
{
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
xpj2
.
x
-=
dr
.
x
*
invMassJ
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
xpj2
.
y
-=
dr
.
y
*
invMassJ
;
}
xpj2
.
z
-=
dr
.
z
*
invMassJ
;
mask
=
2
*
mask
+
1
;
converged
=
false
;
offset
*=
2
;
}
__syncthreads
();
}
}
if
(
atomID
.
w
!=
-
1
)
while
(
pos
<
cSim
.
atoms
)
{
{
rpij
.
x
=
xpi
.
x
-
xpj3
.
x
;
float4
velocity
=
cSim
.
pVelm4
[
pos
];
rpij
.
y
=
xpi
.
y
-
xpj3
.
y
;
float4
xVector
=
cSim
.
pxVector4
[
pos
];
rpij
.
z
=
xpi
.
z
-
xpj3
.
z
;
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
rrpr
=
psA
->
rij3
.
x
*
rpij
.
x
+
psA
->
rij3
.
y
*
rpij
.
y
+
psA
->
rij3
.
z
*
rpij
.
z
;
float4
apos
=
cSim
.
pPosq
[
pos
];
diff
=
fabs
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
float4
force
=
cSim
.
pForce4
[
pos
];
if
(
diff
>=
1.0
f
)
{
float3
Vmh
;
float
acor
=
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij3sq
);
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
float3
dr
;
Vmh
.
x
=
xVector
.
x
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
x
;
dr
.
x
=
psA
->
rij3
.
x
*
acor
;
Vmh
.
y
=
xVector
.
y
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
y
;
dr
.
y
=
psA
->
rij3
.
y
*
acor
;
Vmh
.
z
=
xVector
.
z
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
z
;
dr
.
z
=
psA
->
rij3
.
z
*
acor
;
float4
vVector
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
vVector
.
x
=
sqrtInvMass
*
random4a
.
w
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
vVector
.
y
=
sqrtInvMass
*
random2a
.
x
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
vVector
.
z
=
sqrtInvMass
*
random2a
.
y
;
xpj3
.
x
-=
dr
.
x
*
invMassJ
;
vVector
.
w
=
0.0
f
;
xpj3
.
y
-=
dr
.
y
*
invMassJ
;
cSim
.
pvVector4
[
pos
]
=
vVector
;
xpj3
.
z
-=
dr
.
z
*
invMassJ
;
velocity
.
x
=
velocity
.
x
*
cSim
.
EM
+
converged
=
false
;
velocity
.
w
*
force
.
x
*
cSim
.
TauOneMinusEM
+
}
vVector
.
x
-
}
cSim
.
EM
*
Vmh
.
x
-
iteration
++
;
sCM
[
0
].
x
;
}
velocity
.
y
=
velocity
.
y
*
cSim
.
EM
+
cSim
.
pPosqP
[
atomID
.
x
]
=
xpi
;
velocity
.
w
*
force
.
y
*
cSim
.
TauOneMinusEM
+
cSim
.
pPosqP
[
atomID
.
y
]
=
xpj1
;
vVector
.
y
-
if
(
atomID
.
z
!=
-
1
)
cSim
.
EM
*
Vmh
.
y
-
cSim
.
pPosqP
[
atomID
.
z
]
=
xpj2
;
sCM
[
0
].
y
;
if
(
atomID
.
w
!=
-
1
)
velocity
.
z
=
velocity
.
z
*
cSim
.
EM
+
cSim
.
pPosqP
[
atomID
.
w
]
=
xpj3
;
velocity
.
w
*
force
.
z
*
cSim
.
TauOneMinusEM
+
vVector
.
z
-
pos
+=
blockDim
.
x
*
gridDim
.
x
;
cSim
.
EM
*
Vmh
.
z
-
}
sCM
[
0
].
z
;
}
cSim
.
pOldPosq
[
pos
]
=
apos
;
#ifndef DeltaShake
void
kApplyFirstShake
(
gpuContext
gpu
)
apos
.
x
+=
velocity
.
x
*
cSim
.
fix1
;
{
apos
.
y
+=
velocity
.
y
*
cSim
.
fix1
;
// printf("kApplyFirstShake\n");
apos
.
z
+=
velocity
.
z
*
cSim
.
fix1
;
if
(
gpu
->
sim
.
ShakeConstraints
>
0
)
#else
{
apos
.
x
=
velocity
.
x
*
cSim
.
fix1
;
kApplyFirstShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
shake_threads_per_block
>>>
();
apos
.
y
=
velocity
.
y
*
cSim
.
fix1
;
LAUNCHERROR
(
"kApplyFirstShake"
);
apos
.
z
=
velocity
.
z
*
cSim
.
fix1
;
}
#endif
}
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
__global__
void
kApplySecondShake_kernel
()
pos
+=
blockDim
.
x
*
gridDim
.
x
;
{
}
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
}
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ShakeConstraints
)
{
void
kUpdatePart1
(
gpuContext
gpu
)
int4
atomID
=
cSim
.
pShakeID
[
pos
];
{
float4
params
=
cSim
.
pShakeParameter
[
pos
];
// printf("kUpdatePart1\n");
float4
apos
=
cSim
.
pOldPosq
[
atomID
.
x
];
#if 0
float4
xpi
=
cSim
.
pPosq
[
atomID
.
x
];
static int iteration = 0;
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
if (iteration == 0)
float4
xpj1
=
cSim
.
pPosq
[
atomID
.
y
];
{
float4
apos2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
gpu->psPosq4->Download();
float4
xpj2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
gpu->psVelm4->Download();
psA
->
InvMassI
=
params
.
x
;
printf("# %d atoms\n", gpu->natoms);
psA
->
M
=
params
.
y
;
for (int i = 0; i < gpu->natoms; i++)
psA
->
d2
=
params
.
z
;
{
float
invMassJ
=
params
.
w
;
printf("%5d %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
if
(
atomID
.
z
!=
-
1
)
gpu->psPosq4->_pSysStream[0][i].x, gpu->psPosq4->_pSysStream[0][i].y,
{
gpu->psPosq4->_pSysStream[0][i].z, gpu->psPosq4->_pSysStream[0][i].w,
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
gpu->psVelm4->_pSysStream[0][i].x, gpu->psVelm4->_pSysStream[0][i].y,
xpj2
=
cSim
.
pPosq
[
atomID
.
z
];
gpu->psVelm4->_pSysStream[0][i].z, gpu->psVelm4->_pSysStream[0][i].w
}
);
float4
apos3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
}
float4
xpj3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
}
if
(
atomID
.
w
!=
-
1
)
iteration++;
{
#endif
apos3
=
cSim
.
pOldPosq
[
atomID
.
w
];
#if 0
xpj3
=
cSim
.
pPosq
[
atomID
.
w
];
static const float KILO = 1e3; // Thousand
}
static const float BOLTZMANN = 1.380658e-23f; // (J/K)
static const float AVOGADRO = 6.0221367e23f; // ()
float3
xi
,
xj1
,
xj2
,
xj3
;
static const float RGAS = BOLTZMANN * AVOGADRO; // (J/(mol K))
xi
.
x
=
apos
.
x
;
static const float BOLTZ = (RGAS / KILO); // (kJ/(mol K))
xi
.
y
=
apos
.
y
;
static int iteration = 0;
xi
.
z
=
apos
.
z
;
xj1
.
x
=
apos1
.
x
;
// Check T
xj1
.
y
=
apos1
.
y
;
if (iteration % 1000 == 0)
xj1
.
z
=
apos1
.
z
;
{
xj2
.
x
=
apos2
.
x
;
gpu->psVelm4->Download();
xj2
.
y
=
apos2
.
y
;
float ke = 0.0f;
xj2
.
z
=
apos2
.
z
;
for (int i = 0; i < gpu->natoms; i++)
xj3
.
x
=
apos3
.
x
;
{
xj3
.
y
=
apos3
.
y
;
float vx = gpu->psVelm4->_pSysStream[0][i].x;
xj3
.
z
=
apos3
.
z
;
float vy = gpu->psVelm4->_pSysStream[0][i].y;
psA
->
rij1
.
x
=
xi
.
x
-
xj1
.
x
;
float vz = gpu->psVelm4->_pSysStream[0][i].z;
psA
->
rij1
.
y
=
xi
.
y
-
xj1
.
y
;
float m = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
psA
->
rij1
.
z
=
xi
.
z
-
xj1
.
z
;
ke += m * (vx * vx + vy * vy + vz * vz);
psA
->
rij2
.
x
=
xi
.
x
-
xj2
.
x
;
}
psA
->
rij2
.
y
=
xi
.
y
-
xj2
.
y
;
float T = ke / (BOLTZ * gpu->sim.degreesOfFreedom);
psA
->
rij2
.
z
=
xi
.
z
-
xj2
.
z
;
printf("Iteration %d, Temperature is %f\n", iteration, T);
psA
->
rij3
.
x
=
xi
.
x
-
xj3
.
x
;
}
psA
->
rij3
.
y
=
xi
.
y
-
xj3
.
y
;
iteration++;
psA
->
rij3
.
z
=
xi
.
z
-
xj3
.
z
;
#endif
psA
->
rij1sq
=
psA
->
rij1
.
x
*
psA
->
rij1
.
x
+
psA
->
rij1
.
y
*
psA
->
rij1
.
y
+
psA
->
rij1
.
z
*
psA
->
rij1
.
z
;
if
(
gpu
->
bRemoveCM
)
psA
->
rij2sq
=
psA
->
rij2
.
x
*
psA
->
rij2
.
x
+
psA
->
rij2
.
y
*
psA
->
rij2
.
y
+
psA
->
rij2
.
z
*
psA
->
rij2
.
z
;
{
psA
->
rij3sq
=
psA
->
rij3
.
x
*
psA
->
rij3
.
x
+
psA
->
rij3
.
y
*
psA
->
rij3
.
y
+
psA
->
rij3
.
z
*
psA
->
rij3
.
z
;
kUpdatePart1CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
float
ld1
=
psA
->
d2
-
psA
->
rij1sq
;
LAUNCHERROR
(
"kUpdatePart1CM"
);
float
ld2
=
psA
->
d2
-
psA
->
rij2sq
;
gpu
->
bRemoveCM
=
false
;
float
ld3
=
psA
->
d2
-
psA
->
rij3sq
;
#if 0
gpu->psLinearMomentum->Download();
bool
converged
=
false
;
gpu->psVelm4->Download();
int
iteration
=
0
;
float3 mv = {0.0f, 0.0f, 0.0f};
while
(
iteration
<
15
&&
!
converged
)
for (int i = 0; i < gpu->natoms; i++)
{
{
converged
=
true
;
float mass = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
float3
rpij
;
mv.x += mass * gpu->psVelm4->_pSysStream[0][i].x;
rpij
.
x
=
xpi
.
x
-
xpj1
.
x
;
mv.y += mass * gpu->psVelm4->_pSysStream[0][i].y;
rpij
.
y
=
xpi
.
y
-
xpj1
.
y
;
mv.z += mass * gpu->psVelm4->_pSysStream[0][i].z;
rpij
.
z
=
xpi
.
z
-
xpj1
.
z
;
}
float
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
mv.x *= gpu->sim.inverseTotalMass;
float
rrpr
=
psA
->
rij1
.
x
*
rpij
.
x
+
psA
->
rij1
.
y
*
rpij
.
y
+
psA
->
rij1
.
z
*
rpij
.
z
;
mv.y *= gpu->sim.inverseTotalMass;
float
diff
=
fabs
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
mv.z *= gpu->sim.inverseTotalMass;
if
(
diff
>=
1.0
f
)
{
float3 mv1 = {0.0f, 0.0f, 0.0f};
float
acor
=
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij1sq
);
for (int i = 0; i < gpu->sim.blocks; i++)
float3
dr
;
{
dr
.
x
=
psA
->
rij1
.
x
*
acor
;
mv1.x += gpu->psLinearMomentum->_pSysStream[0][i].x;
dr
.
y
=
psA
->
rij1
.
y
*
acor
;
mv1.y += gpu->psLinearMomentum->_pSysStream[0][i].y;
dr
.
z
=
psA
->
rij1
.
z
*
acor
;
mv1.z += gpu->psLinearMomentum->_pSysStream[0][i].z;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
}
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
printf("%11.5f %11.5f %11.5f | %11.5f %11.5f %11.5f\n", mv.x, mv.y, mv.z, mv1.x, mv1.y, mv1.z);
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
#endif
xpj1
.
x
-=
dr
.
x
*
invMassJ
;
}
xpj1
.
y
-=
dr
.
y
*
invMassJ
;
else
xpj1
.
z
-=
dr
.
z
*
invMassJ
;
{
converged
=
false
;
kUpdatePart1_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
}
LAUNCHERROR
(
"kUpdatePart1"
);
}
if
(
atomID
.
z
!=
-
1
)
}
{
rpij
.
x
=
xpi
.
x
-
xpj2
.
x
;
__global__
void
kApplyFirstShake_kernel
()
rpij
.
y
=
xpi
.
y
-
xpj2
.
y
;
{
rpij
.
z
=
xpi
.
z
-
xpj2
.
z
;
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
rrpr
=
psA
->
rij2
.
x
*
rpij
.
x
+
psA
->
rij2
.
y
*
rpij
.
y
+
psA
->
rij2
.
z
*
rpij
.
z
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
diff
=
fabs
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
while
(
pos
<
cSim
.
ShakeConstraints
)
if
(
diff
>=
1.0
f
)
{
{
int4
atomID
=
cSim
.
pShakeID
[
pos
];
float
acor
=
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij2sq
);
float4
params
=
cSim
.
pShakeParameter
[
pos
];
float3
dr
;
float4
apos
=
cSim
.
pOldPosq
[
atomID
.
x
];
dr
.
x
=
psA
->
rij2
.
x
*
acor
;
float4
xpi
=
cSim
.
pPosqP
[
atomID
.
x
];
dr
.
y
=
psA
->
rij2
.
y
*
acor
;
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
dr
.
z
=
psA
->
rij2
.
z
*
acor
;
float4
xpj1
=
cSim
.
pPosqP
[
atomID
.
y
];
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
float4
apos2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
float4
xpj2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
psA
->
InvMassI
=
params
.
x
;
xpj2
.
x
-=
dr
.
x
*
invMassJ
;
psA
->
M
=
params
.
y
;
xpj2
.
y
-=
dr
.
y
*
invMassJ
;
psA
->
d2
=
params
.
z
;
xpj2
.
z
-=
dr
.
z
*
invMassJ
;
float
invMassJ
=
params
.
w
;
converged
=
false
;
if
(
atomID
.
z
!=
-
1
)
}
{
}
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
xpj2
=
cSim
.
pPosqP
[
atomID
.
z
];
if
(
atomID
.
w
!=
-
1
)
}
{
float4
apos3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
rpij
.
x
=
xpi
.
x
-
xpj3
.
x
;
float4
xpj3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
rpij
.
y
=
xpi
.
y
-
xpj3
.
y
;
if
(
atomID
.
w
!=
-
1
)
rpij
.
z
=
xpi
.
z
-
xpj3
.
z
;
{
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
apos3
=
cSim
.
pOldPosq
[
atomID
.
w
];
rrpr
=
psA
->
rij3
.
x
*
rpij
.
x
+
psA
->
rij3
.
y
*
rpij
.
y
+
psA
->
rij3
.
z
*
rpij
.
z
;
xpj3
=
cSim
.
pPosqP
[
atomID
.
w
];
diff
=
fabs
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
}
if
(
diff
>=
1.0
f
)
{
float3
xi
,
xj1
,
xj2
,
xj3
;
float
acor
=
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij3sq
);
xi
.
x
=
apos
.
x
;
float3
dr
;
xi
.
y
=
apos
.
y
;
dr
.
x
=
psA
->
rij3
.
x
*
acor
;
xi
.
z
=
apos
.
z
;
dr
.
y
=
psA
->
rij3
.
y
*
acor
;
xj1
.
x
=
apos1
.
x
;
dr
.
z
=
psA
->
rij3
.
z
*
acor
;
xj1
.
y
=
apos1
.
y
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
xj1
.
z
=
apos1
.
z
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
xj2
.
x
=
apos2
.
x
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
xj2
.
y
=
apos2
.
y
;
xpj3
.
x
-=
dr
.
x
*
invMassJ
;
xj2
.
z
=
apos2
.
z
;
xpj3
.
y
-=
dr
.
y
*
invMassJ
;
xj3
.
x
=
apos3
.
x
;
xpj3
.
z
-=
dr
.
z
*
invMassJ
;
xj3
.
y
=
apos3
.
y
;
converged
=
false
;
xj3
.
z
=
apos3
.
z
;
}
#ifndef DeltaShake
}
xpi
.
x
-=
xi
.
x
;
iteration
++
;
xpi
.
y
-=
xi
.
y
;
}
xpi
.
z
-=
xi
.
z
;
xpj1
.
x
-=
xj1
.
x
;
xpi
.
x
+=
xi
.
x
;
xpj1
.
y
-=
xj1
.
y
;
xpi
.
y
+=
xi
.
y
;
xpj1
.
z
-=
xj1
.
z
;
xpi
.
z
+=
xi
.
z
;
xpj2
.
x
-=
xj2
.
x
;
xpj1
.
x
+=
xj1
.
x
;
xpj2
.
y
-=
xj2
.
y
;
xpj1
.
y
+=
xj1
.
y
;
xpj2
.
z
-=
xj2
.
z
;
xpj1
.
z
+=
xj1
.
z
;
xpj3
.
x
-=
xj3
.
x
;
xpj2
.
x
+=
xj2
.
x
;
xpj3
.
y
-=
xj3
.
y
;
xpj2
.
y
+=
xj2
.
y
;
xpj3
.
z
-=
xj3
.
z
;
xpj2
.
z
+=
xj2
.
z
;
#endif
xpj3
.
x
+=
xj3
.
x
;
psA
->
rij1
.
x
=
xi
.
x
-
xj1
.
x
;
xpj3
.
y
+=
xj3
.
y
;
psA
->
rij1
.
y
=
xi
.
y
-
xj1
.
y
;
xpj3
.
z
+=
xj3
.
z
;
psA
->
rij1
.
z
=
xi
.
z
-
xj1
.
z
;
psA
->
rij2
.
x
=
xi
.
x
-
xj2
.
x
;
cSim
.
pPosq
[
atomID
.
x
]
=
xpi
;
psA
->
rij2
.
y
=
xi
.
y
-
xj2
.
y
;
cSim
.
pPosq
[
atomID
.
y
]
=
xpj1
;
psA
->
rij2
.
z
=
xi
.
z
-
xj2
.
z
;
psA
->
rij3
.
x
=
xi
.
x
-
xj3
.
x
;
if
(
atomID
.
z
!=
-
1
)
psA
->
rij3
.
y
=
xi
.
y
-
xj3
.
y
;
cSim
.
pPosq
[
atomID
.
z
]
=
xpj2
;
psA
->
rij3
.
z
=
xi
.
z
-
xj3
.
z
;
psA
->
rij1sq
=
psA
->
rij1
.
x
*
psA
->
rij1
.
x
+
psA
->
rij1
.
y
*
psA
->
rij1
.
y
+
psA
->
rij1
.
z
*
psA
->
rij1
.
z
;
if
(
atomID
.
w
!=
-
1
)
psA
->
rij2sq
=
psA
->
rij2
.
x
*
psA
->
rij2
.
x
+
psA
->
rij2
.
y
*
psA
->
rij2
.
y
+
psA
->
rij2
.
z
*
psA
->
rij2
.
z
;
cSim
.
pPosq
[
atomID
.
w
]
=
xpj3
;
psA
->
rij3sq
=
psA
->
rij3
.
x
*
psA
->
rij3
.
x
+
psA
->
rij3
.
y
*
psA
->
rij3
.
y
+
psA
->
rij3
.
z
*
psA
->
rij3
.
z
;
float
ld1
=
psA
->
d2
-
psA
->
rij1sq
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
float
ld2
=
psA
->
d2
-
psA
->
rij2sq
;
}
float
ld3
=
psA
->
d2
-
psA
->
rij3sq
;
}
__global__
void
kApplyNoShake_kernel
()
bool
converged
=
false
;
{
int
iteration
=
0
;
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
iteration
<
15
&&
!
converged
)
while
(
pos
<
cSim
.
NonShakeConstraints
)
{
{
converged
=
true
;
int
atomID
=
cSim
.
pNonShakeID
[
pos
];
float3
rpij
;
float4
apos
=
cSim
.
pOldPosq
[
atomID
];
rpij
.
x
=
xpi
.
x
-
xpj1
.
x
;
float4
xpi
=
cSim
.
pPosq
[
atomID
];
rpij
.
y
=
xpi
.
y
-
xpj1
.
y
;
xpi
.
x
+=
apos
.
x
;
rpij
.
z
=
xpi
.
z
-
xpj1
.
z
;
xpi
.
y
+=
apos
.
y
;
float
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
xpi
.
z
+=
apos
.
z
;
float
rrpr
=
psA
->
rij1
.
x
*
rpij
.
x
+
psA
->
rij1
.
y
*
rpij
.
y
+
psA
->
rij1
.
z
*
rpij
.
z
;
cSim
.
pPosq
[
atomID
]
=
xpi
;
float
diff
=
fabs
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if
(
diff
>=
1.0
f
)
pos
+=
blockDim
.
x
*
gridDim
.
x
;
{
}
float
acor
=
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij1sq
);
}
float3
dr
;
dr
.
x
=
psA
->
rij1
.
x
*
acor
;
dr
.
y
=
psA
->
rij1
.
y
*
acor
;
void
kApplySecondShake
(
gpuContext
gpu
)
dr
.
z
=
psA
->
rij1
.
z
*
acor
;
{
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
// printf("kApplySecondShake\n");
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
if
(
gpu
->
sim
.
ShakeConstraints
>
0
)
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
{
xpj1
.
x
-=
dr
.
x
*
invMassJ
;
kApplySecondShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
shake_threads_per_block
>>>
();
xpj1
.
y
-=
dr
.
y
*
invMassJ
;
LAUNCHERROR
(
"kApplySecondShake"
);
xpj1
.
z
-=
dr
.
z
*
invMassJ
;
}
converged
=
false
;
}
// handle non-Shake atoms
if
(
atomID
.
z
!=
-
1
)
if
(
gpu
->
sim
.
NonShakeConstraints
>
0
)
{
{
rpij
.
x
=
xpi
.
x
-
xpj2
.
x
;
kApplyNoShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonshake_threads_per_block
>>>
();
rpij
.
y
=
xpi
.
y
-
xpj2
.
y
;
LAUNCHERROR
(
"kApplyNoShake"
);
rpij
.
z
=
xpi
.
z
-
xpj2
.
z
;
}
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
}
rrpr
=
psA
->
rij2
.
x
*
rpij
.
x
+
psA
->
rij2
.
y
*
rpij
.
y
+
psA
->
rij2
.
z
*
rpij
.
z
;
diff
=
fabs
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if
(
diff
>=
1.0
f
)
{
float
acor
=
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij2sq
);
float3
dr
;
dr
.
x
=
psA
->
rij2
.
x
*
acor
;
dr
.
y
=
psA
->
rij2
.
y
*
acor
;
dr
.
z
=
psA
->
rij2
.
z
*
acor
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
xpj2
.
x
-=
dr
.
x
*
invMassJ
;
xpj2
.
y
-=
dr
.
y
*
invMassJ
;
xpj2
.
z
-=
dr
.
z
*
invMassJ
;
converged
=
false
;
}
}
if
(
atomID
.
w
!=
-
1
)
{
rpij
.
x
=
xpi
.
x
-
xpj3
.
x
;
rpij
.
y
=
xpi
.
y
-
xpj3
.
y
;
rpij
.
z
=
xpi
.
z
-
xpj3
.
z
;
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
rrpr
=
psA
->
rij3
.
x
*
rpij
.
x
+
psA
->
rij3
.
y
*
rpij
.
y
+
psA
->
rij3
.
z
*
rpij
.
z
;
diff
=
fabs
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if
(
diff
>=
1.0
f
)
{
float
acor
=
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij3sq
);
float3
dr
;
dr
.
x
=
psA
->
rij3
.
x
*
acor
;
dr
.
y
=
psA
->
rij3
.
y
*
acor
;
dr
.
z
=
psA
->
rij3
.
z
*
acor
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
xpj3
.
x
-=
dr
.
x
*
invMassJ
;
xpj3
.
y
-=
dr
.
y
*
invMassJ
;
xpj3
.
z
-=
dr
.
z
*
invMassJ
;
converged
=
false
;
}
}
iteration
++
;
}
#ifndef DeltaShake
xpi
.
x
+=
xi
.
x
;
xpi
.
y
+=
xi
.
y
;
xpi
.
z
+=
xi
.
z
;
xpj1
.
x
+=
xj1
.
x
;
xpj1
.
y
+=
xj1
.
y
;
xpj1
.
z
+=
xj1
.
z
;
xpj2
.
x
+=
xj2
.
x
;
xpj2
.
y
+=
xj2
.
y
;
xpj2
.
z
+=
xj2
.
z
;
xpj3
.
x
+=
xj3
.
x
;
xpj3
.
y
+=
xj3
.
y
;
xpj3
.
z
+=
xj3
.
z
;
#endif
cSim
.
pPosqP
[
atomID
.
x
]
=
xpi
;
cSim
.
pPosqP
[
atomID
.
y
]
=
xpj1
;
if
(
atomID
.
z
!=
-
1
)
cSim
.
pPosqP
[
atomID
.
z
]
=
xpj2
;
if
(
atomID
.
w
!=
-
1
)
cSim
.
pPosqP
[
atomID
.
w
]
=
xpj3
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kApplyFirstShake
(
gpuContext
gpu
)
{
// printf("kApplyFirstShake\n");
if
(
gpu
->
sim
.
ShakeConstraints
>
0
)
{
kApplyFirstShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
shake_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplyFirstShake"
);
}
}
__global__
void
kUpdatePart2_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
__syncthreads
();
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
#ifndef DeltaShake
float4
apos
=
cSim
.
pPosq
[
pos
];
#endif
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
#ifdef DeltaShake
velocity
.
x
=
xPrime
.
x
*
cSim
.
oneOverFix1
;
velocity
.
y
=
xPrime
.
y
*
cSim
.
oneOverFix1
;
velocity
.
z
=
xPrime
.
z
*
cSim
.
oneOverFix1
;
#else
velocity
.
x
=
(
xPrime
.
x
-
apos
.
x
)
*
cSim
.
oneOverFix1
;
velocity
.
y
=
(
xPrime
.
y
-
apos
.
y
)
*
cSim
.
oneOverFix1
;
velocity
.
z
=
(
xPrime
.
z
-
apos
.
z
)
*
cSim
.
oneOverFix1
;
#endif
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Update random position pointer
if
(
threadIdx
.
x
==
0
)
{
rpos
+=
cSim
.
paddedNumberOfAtoms
;
if
(
rpos
>
cSim
.
randoms
)
rpos
-=
cSim
.
randoms
;
cSim
.
pRandomPosition
[
blockIdx
.
x
]
=
rpos
;
}
}
__global__
void
kUpdatePart2CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
__syncthreads
();
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
#ifndef DeltaShake
float4
apos
=
cSim
.
pPosq
[
pos
];
#endif
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
mass
=
1.0
f
/
velocity
.
w
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
#ifdef DeltaShake
velocity
.
x
=
xPrime
.
x
*
cSim
.
oneOverFix1
;
velocity
.
y
=
xPrime
.
y
*
cSim
.
oneOverFix1
;
velocity
.
z
=
xPrime
.
z
*
cSim
.
oneOverFix1
;
#else
velocity
.
x
=
(
xPrime
.
x
-
apos
.
x
)
*
cSim
.
oneOverFix1
;
velocity
.
y
=
(
xPrime
.
y
-
apos
.
y
)
*
cSim
.
oneOverFix1
;
velocity
.
z
=
(
xPrime
.
z
-
apos
.
z
)
*
cSim
.
oneOverFix1
;
#endif
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Update random position pointer
if
(
threadIdx
.
x
==
0
)
{
rpos
+=
cSim
.
paddedNumberOfAtoms
;
if
(
rpos
>
cSim
.
randoms
)
rpos
-=
cSim
.
randoms
;
cSim
.
pRandomPosition
[
blockIdx
.
x
]
=
rpos
;
}
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0.0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
}
extern
void
kGenerateRandoms
(
gpuContext
gpu
);
void
kUpdatePart2
(
gpuContext
gpu
)
{
// printf("kUpdatePart2\n");
if
(
gpu
->
bCalculateCM
)
{
kUpdatePart2CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kUpdatePart2CM"
);
gpu
->
bCalculateCM
=
false
;
gpu
->
bRemoveCM
=
true
;
#if 0
gpu->psLinearMomentum->Download();
gpu->psVelm4->Download();
float3 mv = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->natoms; i++)
{
float mass = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
mv.x += mass * gpu->psVelm4->_pSysStream[0][i].x;
mv.y += mass * gpu->psVelm4->_pSysStream[0][i].y;
mv.z += mass * gpu->psVelm4->_pSysStream[0][i].z;
}
mv.x *= gpu->sim.inverseTotalMass;
mv.y *= gpu->sim.inverseTotalMass;
mv.z *= gpu->sim.inverseTotalMass;
float3 mv1 = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->sim.blocks; i++)
{
mv1.x += gpu->psLinearMomentum->_pSysStream[0][i].x;
mv1.y += gpu->psLinearMomentum->_pSysStream[0][i].y;
mv1.z += gpu->psLinearMomentum->_pSysStream[0][i].z;
}
printf("%11.5f %11.5f %11.5f | %11.5f %11.5f %11.5f\n", mv.x, mv.y, mv.z, mv1.x, mv1.y, mv1.z);
#endif
}
else
{
kUpdatePart2_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kUpdatePart2"
);
}
// Update randoms if necessary
gpu
->
iterations
++
;
if
(
gpu
->
iterations
==
gpu
->
sim
.
randomIterations
)
{
kGenerateRandoms
(
gpu
);
gpu
->
iterations
=
0
;
}
}
__global__
void
kApplySecondShake_kernel
()
{
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
Atom
*
psA
=
&
sA
[
threadIdx
.
x
];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
ShakeConstraints
)
{
int4
atomID
=
cSim
.
pShakeID
[
pos
];
float4
params
=
cSim
.
pShakeParameter
[
pos
];
float4
apos
=
cSim
.
pOldPosq
[
atomID
.
x
];
float4
xpi
=
cSim
.
pPosq
[
atomID
.
x
];
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
float4
xpj1
=
cSim
.
pPosq
[
atomID
.
y
];
float4
apos2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
float4
xpj2
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
psA
->
InvMassI
=
params
.
x
;
psA
->
M
=
params
.
y
;
psA
->
d2
=
params
.
z
;
float
invMassJ
=
params
.
w
;
if
(
atomID
.
z
!=
-
1
)
{
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
xpj2
=
cSim
.
pPosq
[
atomID
.
z
];
}
float4
apos3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
float4
xpj3
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
if
(
atomID
.
w
!=
-
1
)
{
apos3
=
cSim
.
pOldPosq
[
atomID
.
w
];
xpj3
=
cSim
.
pPosq
[
atomID
.
w
];
}
float3
xi
,
xj1
,
xj2
,
xj3
;
xi
.
x
=
apos
.
x
;
xi
.
y
=
apos
.
y
;
xi
.
z
=
apos
.
z
;
xj1
.
x
=
apos1
.
x
;
xj1
.
y
=
apos1
.
y
;
xj1
.
z
=
apos1
.
z
;
xj2
.
x
=
apos2
.
x
;
xj2
.
y
=
apos2
.
y
;
xj2
.
z
=
apos2
.
z
;
xj3
.
x
=
apos3
.
x
;
xj3
.
y
=
apos3
.
y
;
xj3
.
z
=
apos3
.
z
;
#ifndef DeltaShake
xpi
.
x
-=
xi
.
x
;
xpi
.
y
-=
xi
.
y
;
xpi
.
z
-=
xi
.
z
;
xpj1
.
x
-=
xj1
.
x
;
xpj1
.
y
-=
xj1
.
y
;
xpj1
.
z
-=
xj1
.
z
;
xpj2
.
x
-=
xj2
.
x
;
xpj2
.
y
-=
xj2
.
y
;
xpj2
.
z
-=
xj2
.
z
;
xpj3
.
x
-=
xj3
.
x
;
xpj3
.
y
-=
xj3
.
y
;
xpj3
.
z
-=
xj3
.
z
;
#endif
psA
->
rij1
.
x
=
xi
.
x
-
xj1
.
x
;
psA
->
rij1
.
y
=
xi
.
y
-
xj1
.
y
;
psA
->
rij1
.
z
=
xi
.
z
-
xj1
.
z
;
psA
->
rij2
.
x
=
xi
.
x
-
xj2
.
x
;
psA
->
rij2
.
y
=
xi
.
y
-
xj2
.
y
;
psA
->
rij2
.
z
=
xi
.
z
-
xj2
.
z
;
psA
->
rij3
.
x
=
xi
.
x
-
xj3
.
x
;
psA
->
rij3
.
y
=
xi
.
y
-
xj3
.
y
;
psA
->
rij3
.
z
=
xi
.
z
-
xj3
.
z
;
psA
->
rij1sq
=
psA
->
rij1
.
x
*
psA
->
rij1
.
x
+
psA
->
rij1
.
y
*
psA
->
rij1
.
y
+
psA
->
rij1
.
z
*
psA
->
rij1
.
z
;
psA
->
rij2sq
=
psA
->
rij2
.
x
*
psA
->
rij2
.
x
+
psA
->
rij2
.
y
*
psA
->
rij2
.
y
+
psA
->
rij2
.
z
*
psA
->
rij2
.
z
;
psA
->
rij3sq
=
psA
->
rij3
.
x
*
psA
->
rij3
.
x
+
psA
->
rij3
.
y
*
psA
->
rij3
.
y
+
psA
->
rij3
.
z
*
psA
->
rij3
.
z
;
float
ld1
=
psA
->
d2
-
psA
->
rij1sq
;
float
ld2
=
psA
->
d2
-
psA
->
rij2sq
;
float
ld3
=
psA
->
d2
-
psA
->
rij3sq
;
bool
converged
=
false
;
int
iteration
=
0
;
while
(
iteration
<
15
&&
!
converged
)
{
converged
=
true
;
float3
rpij
;
rpij
.
x
=
xpi
.
x
-
xpj1
.
x
;
rpij
.
y
=
xpi
.
y
-
xpj1
.
y
;
rpij
.
z
=
xpi
.
z
-
xpj1
.
z
;
float
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
float
rrpr
=
psA
->
rij1
.
x
*
rpij
.
x
+
psA
->
rij1
.
y
*
rpij
.
y
+
psA
->
rij1
.
z
*
rpij
.
z
;
float
diff
=
fabs
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if
(
diff
>=
1.0
f
)
{
float
acor
=
(
ld1
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij1sq
);
float3
dr
;
dr
.
x
=
psA
->
rij1
.
x
*
acor
;
dr
.
y
=
psA
->
rij1
.
y
*
acor
;
dr
.
z
=
psA
->
rij1
.
z
*
acor
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
xpj1
.
x
-=
dr
.
x
*
invMassJ
;
xpj1
.
y
-=
dr
.
y
*
invMassJ
;
xpj1
.
z
-=
dr
.
z
*
invMassJ
;
converged
=
false
;
}
if
(
atomID
.
z
!=
-
1
)
{
rpij
.
x
=
xpi
.
x
-
xpj2
.
x
;
rpij
.
y
=
xpi
.
y
-
xpj2
.
y
;
rpij
.
z
=
xpi
.
z
-
xpj2
.
z
;
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
rrpr
=
psA
->
rij2
.
x
*
rpij
.
x
+
psA
->
rij2
.
y
*
rpij
.
y
+
psA
->
rij2
.
z
*
rpij
.
z
;
diff
=
fabs
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if
(
diff
>=
1.0
f
)
{
float
acor
=
(
ld2
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij2sq
);
float3
dr
;
dr
.
x
=
psA
->
rij2
.
x
*
acor
;
dr
.
y
=
psA
->
rij2
.
y
*
acor
;
dr
.
z
=
psA
->
rij2
.
z
*
acor
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
xpj2
.
x
-=
dr
.
x
*
invMassJ
;
xpj2
.
y
-=
dr
.
y
*
invMassJ
;
xpj2
.
z
-=
dr
.
z
*
invMassJ
;
converged
=
false
;
}
}
if
(
atomID
.
w
!=
-
1
)
{
rpij
.
x
=
xpi
.
x
-
xpj3
.
x
;
rpij
.
y
=
xpi
.
y
-
xpj3
.
y
;
rpij
.
z
=
xpi
.
z
-
xpj3
.
z
;
rpsqij
=
rpij
.
x
*
rpij
.
x
+
rpij
.
y
*
rpij
.
y
+
rpij
.
z
*
rpij
.
z
;
rrpr
=
psA
->
rij3
.
x
*
rpij
.
x
+
psA
->
rij3
.
y
*
rpij
.
y
+
psA
->
rij3
.
z
*
rpij
.
z
;
diff
=
fabs
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
/
(
psA
->
d2
*
cSim
.
shakeTolerance
);
if
(
diff
>=
1.0
f
)
{
float
acor
=
(
ld3
-
2.0
f
*
rrpr
-
rpsqij
)
*
psA
->
M
/
(
rrpr
+
psA
->
rij3sq
);
float3
dr
;
dr
.
x
=
psA
->
rij3
.
x
*
acor
;
dr
.
y
=
psA
->
rij3
.
y
*
acor
;
dr
.
z
=
psA
->
rij3
.
z
*
acor
;
xpi
.
x
+=
dr
.
x
*
psA
->
InvMassI
;
xpi
.
y
+=
dr
.
y
*
psA
->
InvMassI
;
xpi
.
z
+=
dr
.
z
*
psA
->
InvMassI
;
xpj3
.
x
-=
dr
.
x
*
invMassJ
;
xpj3
.
y
-=
dr
.
y
*
invMassJ
;
xpj3
.
z
-=
dr
.
z
*
invMassJ
;
converged
=
false
;
}
}
iteration
++
;
}
xpi
.
x
+=
xi
.
x
;
xpi
.
y
+=
xi
.
y
;
xpi
.
z
+=
xi
.
z
;
xpj1
.
x
+=
xj1
.
x
;
xpj1
.
y
+=
xj1
.
y
;
xpj1
.
z
+=
xj1
.
z
;
xpj2
.
x
+=
xj2
.
x
;
xpj2
.
y
+=
xj2
.
y
;
xpj2
.
z
+=
xj2
.
z
;
xpj3
.
x
+=
xj3
.
x
;
xpj3
.
y
+=
xj3
.
y
;
xpj3
.
z
+=
xj3
.
z
;
cSim
.
pPosq
[
atomID
.
x
]
=
xpi
;
cSim
.
pPosq
[
atomID
.
y
]
=
xpj1
;
if
(
atomID
.
z
!=
-
1
)
cSim
.
pPosq
[
atomID
.
z
]
=
xpj2
;
if
(
atomID
.
w
!=
-
1
)
cSim
.
pPosq
[
atomID
.
w
]
=
xpj3
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kApplyNoShake_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
NonShakeConstraints
)
{
int
atomID
=
cSim
.
pNonShakeID
[
pos
];
float4
apos
=
cSim
.
pOldPosq
[
atomID
];
float4
xpi
=
cSim
.
pPosq
[
atomID
];
xpi
.
x
+=
apos
.
x
;
xpi
.
y
+=
apos
.
y
;
xpi
.
z
+=
apos
.
z
;
cSim
.
pPosq
[
atomID
]
=
xpi
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kCPUShake2
(
gpuContext
gpu
)
{
}
void
kApplySecondShake
(
gpuContext
gpu
)
{
// printf("kApplySecondShake\n");
// kCPUShake2(gpu);
if
(
gpu
->
sim
.
ShakeConstraints
>
0
)
{
kApplySecondShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
shake_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplySecondShake"
);
}
// handle non-Shake atoms
#ifdef DeltaShake
if
(
gpu
->
sim
.
NonShakeConstraints
>
0
)
{
//fprintf( gpu->log, "kApplyNoShake_kernel %d %d \n", gpu->sim.blocks, gpu->sim.nonshake_threads_per_block); fflush( gpu->log );
kApplyNoShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonshake_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplyNoShake"
);
}
#endif
}
platforms/cuda/src/kernels/kVerletUpdate.cu
View file @
3749e84b
...
@@ -35,8 +35,6 @@ using namespace std;
...
@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#include "gputypes.h"
#define DeltaShake
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaGmxSimulation
cSim
;
void
SetVerletUpdateSim
(
gpuContext
gpu
)
void
SetVerletUpdateSim
(
gpuContext
gpu
)
...
@@ -53,102 +51,11 @@ void GetVerletUpdateSim(gpuContext gpu)
...
@@ -53,102 +51,11 @@ void GetVerletUpdateSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
}
__global__
void
kVerletUpdatePart1_kernel
()
// Include versions of the kernels with and with center of mass motion removal.
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
dtOverMass
=
cSim
.
deltaT
*
velocity
.
w
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
velocity
.
x
+=
dtOverMass
*
force
.
x
;
velocity
.
y
+=
dtOverMass
*
force
.
y
;
velocity
.
z
+=
dtOverMass
*
force
.
z
;
#ifndef DeltaShake
apos
.
x
+=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
+=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
+=
velocity
.
z
*
cSim
.
deltaT
;
#else
apos
.
x
=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
=
velocity
.
z
*
cSim
.
deltaT
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kVerletUpdatePart1CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float4
CM1
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
while
(
pos
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
dtOverMass
=
cSim
.
deltaT
*
velocity
.
w
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
velocity
.
x
+=
dtOverMass
*
force
.
x
-
sCM
[
0
].
x
;
velocity
.
y
+=
dtOverMass
*
force
.
y
-
sCM
[
0
].
y
;
velocity
.
z
+=
dtOverMass
*
force
.
z
-
sCM
[
0
].
z
;
#ifndef DeltaShake
#include "kVerletUpdate.h"
apos
.
x
+=
velocity
.
x
*
cSim
.
deltaT
;
#define REMOVE_CM
apos
.
y
+=
velocity
.
y
*
cSim
.
deltaT
;
#include "kVerletUpdate.h"
apos
.
z
+=
velocity
.
z
*
cSim
.
deltaT
;
#else
apos
.
x
=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
=
velocity
.
z
*
cSim
.
deltaT
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kVerletUpdatePart1
(
gpuContext
gpu
)
void
kVerletUpdatePart1
(
gpuContext
gpu
)
{
{
...
@@ -166,105 +73,6 @@ void kVerletUpdatePart1(gpuContext gpu)
...
@@ -166,105 +73,6 @@ void kVerletUpdatePart1(gpuContext gpu)
}
}
}
}
__global__
void
kVerletUpdatePart2_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
#ifndef DeltaShake
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
-
apos
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
-
apos
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
-
apos
.
z
);
#else
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#endif
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kVerletUpdatePart2CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float
mass
=
1.0
f
/
velocity
.
w
;
#ifndef DeltaShake
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
-
apos
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
-
apos
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
-
apos
.
z
);
#else
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#endif
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0.0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
}
void
kVerletUpdatePart2
(
gpuContext
gpu
)
void
kVerletUpdatePart2
(
gpuContext
gpu
)
{
{
// printf("kVerletUpdatePart2\n");
// printf("kVerletUpdatePart2\n");
...
...
platforms/cuda/src/kernels/kVerletUpdate.h
0 → 100644
View file @
3749e84b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Verlet integration. It is included
* several times in kVerletUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__
void
kVerletUpdatePart1CM_kernel
()
#else
__global__
void
kVerletUpdatePart1_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
float4
CM1
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
dtOverMass
=
cSim
.
deltaT
*
velocity
.
w
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
velocity
.
x
+=
dtOverMass
*
force
.
x
;
velocity
.
y
+=
dtOverMass
*
force
.
y
;
velocity
.
z
+=
dtOverMass
*
force
.
z
;
#ifdef REMOVE_CM
velocity
.
x
-=
sCM
[
0
].
x
;
velocity
.
y
-=
sCM
[
0
].
y
;
velocity
.
z
-=
sCM
[
0
].
z
;
#endif
apos
.
x
=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
=
velocity
.
z
*
cSim
.
deltaT
;
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
#ifdef REMOVE_CM
__global__
void
kVerletUpdatePart2CM_kernel
()
#else
__global__
void
kVerletUpdatePart2_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#ifdef REMOVE_CM
float
mass
=
1
.
0
f
/
velocity
.
w
;
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
#endif
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
#ifdef REMOVE_CM
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0
.
0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
#endif
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment