/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#include "gputypes.h"
#include "cudatypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct Atom {
float x;
float y;
float z;
float q;
float sig;
float eps;
float fx;
float fy;
float fz;
};
static __constant__ cudaGmxSimulation cSim;
void SetCalculateCDLJForcesSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetCalculateCDLJForcesSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateCDLJForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCDLJForces.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateCDLJForces.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJForces.h"
// Include version of the kernel with Ewald method
// Real Space Ewald summation utilizes almost the same kernel as Periodic
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##EwaldDirect##b
#include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldDirectByWarp##b
#include "kCalculateCDLJForces.h"
// Reciprocal Space Ewald summation is in a separate kernel
#include "kCalculateCDLJEwaldReciprocal.h"
#include "kCalculateCDLJEwaldFastReciprocal.h"
__global__ extern void kCalculateCDLJCutoffForces_12_kernel();
void kCalculateCDLJForces(gpuContext gpu)
{
// printf("kCalculateCDLJCutoffForces\n");
CUDPPResult result;
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJN2ByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else
kCalculateCDLJN2Forces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCDLJN2Forces");
break;
case CUTOFF:
kFindBlockBoundsCutoff_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsCutoff");
kFindBlocksWithInteractionsCutoff_kernel<<sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsCutoff");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksCutoff_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJCutoffByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJCutoffForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJCutoffForces");
break;
case PERIODIC:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksPeriodic_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJPeriodicByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJPeriodicForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJPeriodicForces");
break;
case EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect");
kFindBlocksWithInteractionsEwaldDirect_kernel<<sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksEwaldDirect_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJEwaldDirectForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
if (false)
{
// O(N2) Ewald summation
kCalculateCDLJEwaldReciprocalForces_kernel<<sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateCDLJEwaldReciprocalForces");
}
else
{
// O(N3/2) Ewald summation
kCalculateEwaldFastEikr_kernel<<sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastEikr");
kCalculateEwaldFastCosSinSums_kernel<<sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
}
}
}