Commit ff0a69f9 authored by Peter Eastman's avatar Peter Eastman
Browse files

Deleted lots of obsolete code.

parent a56ce87d
......@@ -39,10 +39,7 @@ extern void kGenerateRandoms(gpuContext gpu);
// Main loop
extern void kCalculateCDLJObcGbsaForces1(gpuContext gpu);
extern void kCalculateCDLJObcGbsaForces1_12(gpuContext gpu);
extern void kCalculateCDLJForces(gpuContext gpu);
extern void kCalculateObcGbsaForces1(gpuContext gpu);
extern void kCalculateObcGbsaForces1_12(gpuContext gpu);
extern void kReduceObcGbsaBornForces(gpuContext gpu);
extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern void kCalculateLocalForces(gpuContext gpu);
......@@ -72,10 +69,6 @@ extern void SetCalculateLocalForcesSim(gpuContext gpu);
extern void GetCalculateLocalForcesSim(gpuContext gpu);
extern void SetCalculateObcGbsaBornSumSim(gpuContext gpu);
extern void GetCalculateObcGbsaBornSumSim(gpuContext gpu);
extern void SetCalculateObcGbsaForces1Sim(gpuContext gpu);
extern void GetCalculateObcGbsaForces1Sim(gpuContext gpu);
extern void SetCalculateObcGbsaForces1_12Sim(gpuContext gpu);
extern void GetCalculateObcGbsaForces1_12Sim(gpuContext gpu);
extern void SetCalculateObcGbsaForces2Sim(gpuContext gpu);
extern void GetCalculateObcGbsaForces2Sim(gpuContext gpu);
extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
......
......@@ -55,78 +55,6 @@ using namespace std;
using OpenMM::OpenMMException;
#ifdef WIN32
typedef unsigned __int64 u64;
typedef signed __int64 s64;
#else
typedef uint64_t u64;
typedef int64_t s64;
#endif
typedef unsigned int u32;
typedef float f32;
typedef double f64;
typedef char ascii;
typedef char utf8;
typedef unsigned char u8;
typedef signed char s8;
typedef unsigned short u16;
typedef signed short s16;
typedef struct
{
u8 type[4];
f32 charge;
f32 radius;
} FAH_ATOM;
typedef struct
{
u32 a; /* rule: a < b */
u32 b;
} FAH_BOND;
typedef struct
{
f32 x;
f32 y;
f32 z;
} FAH_XYZ;
typedef struct
{
u32 magic;
u32 version;
utf8 name[64];
s64 timestamp;
u64 iterations;
u32 frames;
u32 atom_count;
u32 bond_count;
/* v2 */
utf8 user_name[64];
utf8 user_team[16];
utf8 user_done[16];
} FAH_INFO;
typedef struct
{
u32 magic;
u32 version;
s64 timestamp;
u64 iterations_done;
u32 frames_done;
f32 energy;
f32 temperature;
} FAH_CURRENT;
typedef struct
{
FAH_INFO info;
FAH_CURRENT current;
FAH_ATOM * atoms;
FAH_BOND * bonds;
FAH_XYZ * xyz;
} PROTEIN;
struct ShakeCluster {
int centralID;
int peripheralID[3];
......@@ -196,43 +124,6 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DeltaShake
extern "C"
int gpuReadBondParameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[512];
int bonds;
infile >> bonds;
infile.getline(buff, 512);
vector<int> atom1(bonds);
vector<int> atom2(bonds);
vector<float> length(bonds);
vector<float> k(bonds);
for (int i = 0; i < bonds; i++)
{
int junk;
infile >>
junk >>
atom1[i] >>
atom2[i] >>
length[i] >>
k[i];
}
gpuSetBondParameters(gpu, atom1, atom2, length, k);
return bonds;
}
else
{
cout << "Error opening harmonic bond parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetBondParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<float>& length, const vector<float>& k)
{
......@@ -268,45 +159,6 @@ void gpuSetBondParameters(gpuContext gpu, const vector<int>& atom1, const vector
psBondParameter->Upload();
}
extern "C"
int gpuReadBondAngleParameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[512];
int bond_angles;
infile >> bond_angles;
infile.getline(buff, 512);
vector<int> atom1(bond_angles);
vector<int> atom2(bond_angles);
vector<int> atom3(bond_angles);
vector<float> angle(bond_angles);
vector<float> k(bond_angles);
for (int i = 0; i < bond_angles; i++)
{
int junk;
infile >>
junk >>
atom1[i] >>
atom2[i] >>
atom3[i] >>
angle[i] >>
k[i];
}
gpuSetBondAngleParameters(gpu, atom1, atom2, atom3, angle, k);
return bond_angles;
}
else
{
cout << "Error opening harmonic bond angle parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetBondAngleParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<int>& atom3,
const vector<float>& angle, const vector<float>& k)
......@@ -352,48 +204,6 @@ void gpuSetBondAngleParameters(gpuContext gpu, const vector<int>& atom1, const v
psBondAngleParameter->Upload();
}
extern "C"
int gpuReadDihedralParameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[512];
int dihedrals;
infile >> dihedrals;
infile.getline(buff, 512);
vector<int> atom1(dihedrals);
vector<int> atom2(dihedrals);
vector<int> atom3(dihedrals);
vector<int> atom4(dihedrals);
vector<float> k(dihedrals);
vector<float> phase(dihedrals);
vector<int> periodicity(dihedrals);
for (int i = 0; i < dihedrals; i++)
{
int junk;
infile >>
junk >>
atom1[i] >>
atom2[i] >>
atom3[i] >>
atom4[i] >>
k[i] >>
phase[i] >>
periodicity[i];
}
gpuSetDihedralParameters(gpu, atom1, atom2, atom3, atom4, k, phase, periodicity);
return dihedrals;
}
else
{
cout << "Error opening dihedral parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetDihedralParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<int>& atom3, const vector<int>& atom4,
const vector<float>& k, const vector<float>& phase, const vector<int>& periodicity)
......@@ -443,68 +253,6 @@ void gpuSetDihedralParameters(gpuContext gpu, const vector<int>& atom1, const ve
psDihedralParameter->Upload();
}
extern "C"
int gpuReadRbDihedralParameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[512];
int rb_dihedrals;
infile >> rb_dihedrals;
infile.getline(buff, 512);
vector<int> atom1(rb_dihedrals);
vector<int> atom2(rb_dihedrals);
vector<int> atom3(rb_dihedrals);
vector<int> atom4(rb_dihedrals);
vector<float> c0(rb_dihedrals);
vector<float> c1(rb_dihedrals);
vector<float> c2(rb_dihedrals);
vector<float> c3(rb_dihedrals);
vector<float> c4(rb_dihedrals);
vector<float> c5(rb_dihedrals);
gpu->sim.rb_dihedrals = rb_dihedrals;
CUDAStream<int4>* psRbDihedralID1 = new CUDAStream<int4>(rb_dihedrals, 1);
gpu->psRbDihedralID1 = psRbDihedralID1;
gpu->sim.pRbDihedralID1 = psRbDihedralID1->_pDevStream[0];
CUDAStream<int4>* psRbDihedralID2 = new CUDAStream<int4>(rb_dihedrals, 1);
gpu->psRbDihedralID2 = psRbDihedralID2;
gpu->sim.pRbDihedralID2 = psRbDihedralID2->_pDevStream[0];
CUDAStream<float4>* psRbDihedralParameter1 = new CUDAStream<float4>(rb_dihedrals, 1);
gpu->psRbDihedralParameter1 = psRbDihedralParameter1;
gpu->sim.pRbDihedralParameter1 = psRbDihedralParameter1->_pDevStream[0];
CUDAStream<float2>* psRbDihedralParameter2 = new CUDAStream<float2>(rb_dihedrals, 1);
gpu->psRbDihedralParameter2 = psRbDihedralParameter2;
gpu->sim.pRbDihedralParameter2 = psRbDihedralParameter2->_pDevStream[0];
for (int i = 0; i < rb_dihedrals; i++)
{
int junk;
infile >>
junk >>
atom1[i] >>
atom2[i] >>
atom3[i] >>
atom4[i] >>
c0[i] >>
c1[i] >>
c2[i] >>
c3[i] >>
c4[i] >>
c5[i];
}
gpuSetRbDihedralParameters(gpu, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
return rb_dihedrals;
}
else
{
cout << "Error opening Ryckaert-Bellemans dihedral parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetRbDihedralParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<int>& atom3, const vector<int>& atom4,
const vector<float>& c0, const vector<float>& c1, const vector<float>& c2, const vector<float>& c3, const vector<float>& c4, const vector<float>& c5)
......@@ -566,56 +314,6 @@ void gpuSetRbDihedralParameters(gpuContext gpu, const vector<int>& atom1, const
psRbDihedralParameter2->Upload();
}
extern "C"
int gpuReadLJ14Parameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[1024];
float epsfac = 0.0f;
float fudge = 0.0f;
int LJ14s;
infile >> LJ14s;
infile.get(buff, 61);
// cout << buff << endl;
infile >> epsfac;
infile.get(buff, 8);
infile >> fudge;
infile.getline(buff, 512);
// cout << buff << endl;
vector<int> atom1(LJ14s);
vector<int> atom2(LJ14s);
vector<float> c6(LJ14s);
vector<float> c12(LJ14s);
vector<float> q1(LJ14s);
vector<float> q2(LJ14s);
for (int i = 0; i < LJ14s; i++)
{
int junk;
infile >>
junk >>
atom1[i] >>
atom2[i] >>
c6[i] >>
c12[i] >>
q1[i] >>
q2[i];
}
gpuSetLJ14Parameters(gpu, epsfac, fudge, atom1, atom2, c6, c12, q1, q2);
return LJ14s;
}
else
{
cout << "Error opening Lennard-Jones 1-4 parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vector<int>& atom1, const vector<int>& atom2,
const vector<float>& c6, const vector<float>& c12, const vector<float>& q1, const vector<float>& q2)
......@@ -672,158 +370,6 @@ void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vecto
psLJ14Parameter->Upload();
}
extern "C"
float gpuGetAtomicRadius(gpuContext gpu, string s)
{
for (int i = 0; i < gpu->gAtomTypes; i++)
{
if (s == gpu->gpAtomTable[i].name)
{
return gpu->gpAtomTable[i].r;
}
}
return 0.0f;
}
extern "C"
unsigned char gpuGetAtomicSymbol(gpuContext gpu, string s)
{
for (int i = 0; i < gpu->gAtomTypes; i++)
{
if (s == gpu->gpAtomTable[i].name)
{
return gpu->gpAtomTable[i].symbol;
}
}
return ' ';
}
extern "C"
int gpuReadAtomicParameters(gpuContext gpu, char* fname)
{
gpu->gAtomTypes = 0;
if (gpu->gpAtomTable)
delete[] gpu->gpAtomTable;
// Read file once to count atom types
ifstream infile(fname);
if (!infile.fail())
{
char buff[1024];
int skips = 0;
bool skipflag = true;
while (infile.getline(buff, 512))
{
if (buff[0] == ' ')
{
skipflag = false;
gpu->gAtomTypes++;
}
else if (skipflag)
skips++;
}
infile.close();
gpu->gpAtomTable = new gpuAtomType[gpu->gAtomTypes];
ifstream infile1(fname);
for (int i = 0; i < skips; i++)
{
infile1.getline(buff, 512);
}
for (int i = 0; i < gpu->gAtomTypes; i++)
{
infile1 >> gpu->gpAtomTable[i].name >> gpu->gpAtomTable[i].r;
infile1.getline(buff, 512);
// Determine symbol
if (gpu->gpAtomTable[i].r < 1.3f)
gpu->gpAtomTable[i].symbol = 'H';
else if (gpu->gpAtomTable[i].r < 1.6f)
gpu->gpAtomTable[i].symbol = 'O';
else if (gpu->gpAtomTable[i].r < 1.7f)
gpu->gpAtomTable[i].symbol = 'N';
else
gpu->gpAtomTable[i].symbol = 'C';
#if (DUMP_PARAMETERS == 1)
cout << i << " " << gpu->gpAtomTable[i].name << " " << gpu->gpAtomTable[i].symbol << " " << gpu->gpAtomTable[i].r << endl;
#endif
}
return gpu->gAtomTypes;
}
else
{
cout << "Error opening atom parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
int gpuReadCoulombParameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[1024];
unsigned int coulombs;
float fudge = 0.0f;
float epsfac = 1.0f;
infile >> coulombs;
infile.get(buff, 9);
infile >> epsfac;
infile.get(buff, 8);
infile >> fudge;
infile.getline(buff, 512);
vector<int> atom(coulombs);
vector<float> c6(coulombs);
vector<float> c12(coulombs);
vector<float> q(coulombs);
vector<float> radius(coulombs);
vector<float> scale(coulombs);
vector<char> symbol(coulombs);
vector<vector<int> > exclusions(coulombs);
unsigned int total_exclusions = 0;
for (unsigned int i = 0; i < coulombs; i++)
{
int junk, numExclusions;
char atype[512];
infile >>
junk >>
c6[i] >>
c12[i] >>
q[i] >>
atype >>
scale[i] >>
numExclusions;
radius[i] = gpuGetAtomicRadius(gpu, atype);
symbol[i] = gpuGetAtomicSymbol(gpu, atype);
for (int j = 0; j < numExclusions; j++)
{
int exclusion;
infile >> exclusion;
exclusions[i].push_back(exclusion);
}
}
cout << total_exclusions << " total exclusions.\n";
gpuSetCoulombParameters(gpu, epsfac, atom, c6, c12, q, symbol, exclusions, NO_CUTOFF);
gpuSetObcParameters(gpu, defaultInnerDielectric, defaultSolventDielectric, atom, radius, scale);
return coulombs;
}
else
{
cout << "Error opening Coulomb parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const vector<int>& atom, const vector<float>& c6, const vector<float>& c12, const vector<float>& q,
const vector<char>& symbol, const vector<vector<int> >& exclusions, CudaNonbondedMethod method)
......@@ -911,44 +457,6 @@ void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDie
gpu->sim.preFactor = 2.0f*electricConstant*((1.0f/innerDielectric)-(1.0f/solventDielectric))*gpu->sim.forceConversionFactor;
}
extern "C"
int gpuReadShakeParameters(gpuContext gpu, char* fname)
{
ifstream infile(fname);
if (!infile.fail())
{
char buff[512];
int shake_constraints;
infile >> buff >> shake_constraints;
infile.getline(buff, 512);
vector<int> atom1(shake_constraints);
vector<int> atom2(shake_constraints);
vector<float> distance(shake_constraints);
vector<float> invMass1(shake_constraints);
vector<float> invMass2(shake_constraints);
for (int i = 0; i < shake_constraints; i++)
{
int junk;
infile >>
junk >>
atom1[i] >>
atom2[i] >>
distance[i] >>
invMass1[i] >>
invMass2[i];
}
gpuSetShakeParameters(gpu, atom1, atom2, distance, invMass1, invMass2, 1e-4f);
return gpu->sim.ShakeConstraints;
}
else
{
cout << "Error opening Shake parameter file " << fname << endl;
exit(-1);
}
return 0;
}
extern "C"
void gpuSetShakeParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<float>& distance,
const vector<float>& invMass1, const vector<float>& invMass2, float tolerance)
......@@ -1230,45 +738,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
return 1;
}
extern "C"
void gpuReadCoordinates(gpuContext gpu, char* fname)
{
ifstream infile(fname);
gpu->natoms = 0;
char buff[512];
infile >> buff >> gpu->natoms;
infile.getline(buff, 511);
float totalMass = 0.0f;
gpuAllocateInitialBuffers(gpu);
for (int i = 0; i < gpu->natoms; i++)
{
int junk;
infile >> junk >>
gpu->psPosq4->_pSysStream[0][i].x >>
gpu->psPosq4->_pSysStream[0][i].y >>
gpu->psPosq4->_pSysStream[0][i].z >>
gpu->psPosq4->_pSysStream[0][i].w >>
gpu->psVelm4->_pSysStream[0][i].x >>
gpu->psVelm4->_pSysStream[0][i].y >>
gpu->psVelm4->_pSysStream[0][i].z >>
gpu->psVelm4->_pSysStream[0][i].w;
gpu->psxVector4->_pSysStream[0][i].x = 0.0f;
gpu->psxVector4->_pSysStream[0][i].y = 0.0f;
gpu->psxVector4->_pSysStream[0][i].z = 0.0f;
gpu->psxVector4->_pSysStream[0][i].w = 0.0f;
// Accumulate mass
totalMass += 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
}
gpu->sim.inverseTotalMass = 1.0f / totalMass;
gpu->psPosq4->Upload();
gpu->psVelm4->Upload();
gpu->psxVector4->Upload();
}
extern "C"
void gpuSetPositions(gpuContext gpu, const vector<float>& x, const vector<float>& y, const vector<float>& z)
{
......@@ -1341,37 +810,6 @@ bool gpuIsAvailable()
return (deviceCount > 0);
}
extern "C"
void* gpuInitFromFile(char* fname)
{
ifstream infile(fname);
int numAtoms = 0;
char buff[512];
infile >> buff >> numAtoms;
gpuContext gpu = (gpuContext) gpuInit(numAtoms);
vector<float> x(numAtoms), y(numAtoms), z(numAtoms), charge(numAtoms), vx(numAtoms), vy(numAtoms), vz(numAtoms), mass(numAtoms);
infile.getline(buff, 511);
float totalMass = 0.0f;
for (int i = 0; i < gpu->natoms; i++)
{
int junk;
infile >> junk >>
x[i] >>
y[i] >>
z[i] >>
charge[i] >>
vx[i] >>
vy[i] >>
vz[i] >>
mass[i];
mass[i] = 1.0f/mass[i];
}
gpuSetPositions(gpu, x, y, z);
gpuSetVelocities(gpu, vx, vy, vz);
gpuSetMass(gpu, mass);
return (void*)gpu;
}
extern "C"
void* gpuInit(int numAtoms)
{
......@@ -1979,7 +1417,6 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateCDLJObcGbsaForces1Sim(gpu);
SetCalculateLocalForcesSim(gpu);
SetCalculateObcGbsaBornSumSim(gpu);
SetCalculateObcGbsaForces1Sim(gpu);
SetCalculateObcGbsaForces2Sim(gpu);
SetCalculateAndersenThermostatSim(gpu);
SetForcesSim(gpu);
......@@ -1988,866 +1425,9 @@ int gpuSetConstants(gpuContext gpu)
SetBrownianUpdateSim(gpu);
SetSettleSim(gpu);
SetRandomSim(gpu);
if (gpu->sm_version >= SM_12)
{
SetCalculateObcGbsaForces1_12Sim(gpu);
}
return 1;
}
extern "C"
void gpuDumpCoordinates(gpuContext gpu)
{
gpu->psPosq4->Download();
gpu->psVelm4->Download();
(void) printf( "\n\nCoordinates and velocities\n" );
for (int i = 0; i < gpu->natoms; i++)
{
printf("%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosq4->_pSysStream[0][i].x,
gpu->psPosq4->_pSysStream[0][i].y,
gpu->psPosq4->_pSysStream[0][i].z,
gpu->psPosq4->_pSysStream[0][i].w,
gpu->psVelm4->_pSysStream[0][i].x,
gpu->psVelm4->_pSysStream[0][i].y,
gpu->psVelm4->_pSysStream[0][i].z,
gpu->psVelm4->_pSysStream[0][i].w
);
}
}
bool ISNAN(float f)
{
return !(f == f);
}
extern "C"
bool gpuCheckData(gpuContext gpu)
{
gpu->psPosq4->Download();
gpu->psVelm4->Download();
gpu->psForce4->Download();
gpu->psBornForce->Download();
int violations = 0;
for (int i = 0; i < gpu->natoms; i++)
{
if (ISNAN( gpu->psPosq4->_pSysStream[0][i].x) ||
ISNAN( gpu->psPosq4->_pSysStream[0][i].y) ||
ISNAN( gpu->psPosq4->_pSysStream[0][i].z) ||
ISNAN( gpu->psVelm4->_pSysStream[0][i].x) ||
ISNAN( gpu->psVelm4->_pSysStream[0][i].y) ||
ISNAN( gpu->psVelm4->_pSysStream[0][i].z) ||
ISNAN( gpu->psForce4->_pSysStream[0][i].x) ||
ISNAN( gpu->psForce4->_pSysStream[0][i].y) ||
ISNAN( gpu->psForce4->_pSysStream[0][i].z) ||
ISNAN( gpu->psBornForce->_pSysStream[0][i]))
{
printf("%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosq4->_pSysStream[0][i].x,
gpu->psPosq4->_pSysStream[0][i].y,
gpu->psPosq4->_pSysStream[0][i].z,
gpu->psVelm4->_pSysStream[0][i].x,
gpu->psVelm4->_pSysStream[0][i].y,
gpu->psVelm4->_pSysStream[0][i].z,
gpu->psForce4->_pSysStream[0][i].x,
gpu->psForce4->_pSysStream[0][i].y,
gpu->psForce4->_pSysStream[0][i].z,
gpu->psBornForce->_pSysStream[0][i]
);
violations++;
}
}
if (violations > 0)
{
printf("%d total violations\n", violations);
for (int i = 0; i < gpu->natoms; i++)
{
float dmin = 99999999.0f;
int closest = -9999;
float x = gpu->psPosq4->_pSysStream[0][i].x;
float y = gpu->psPosq4->_pSysStream[0][i].y;
float z = gpu->psPosq4->_pSysStream[0][i].z;
for (int j = 0; j < gpu->natoms; j++)
{
if (j != i)
{
float dx = gpu->psPosq4->_pSysStream[0][j].x - x;
float dy = gpu->psPosq4->_pSysStream[0][j].y - y;
float dz = gpu->psPosq4->_pSysStream[0][j].z - z;
float r = sqrt(dx * dx + dy * dy + dz * dz);
if (r < dmin)
{
dmin = r;
closest = j;
}
}
}
printf("Atom %4d: Closest neighbor is Atom %4d, %11.5e\n", i, closest, dmin);
}
gpuDumpAtomData(gpu);
kClearBornForces(gpu);
kClearForces(gpu);
kCPUCalculateLocalForces(gpu);
// Determine which forces have gone awry
kClearBornForces(gpu);
kClearForces(gpu);
kCalculateCDLJForces(gpu);
kReduceForces(gpu);
printf("Nonbond Forces\n");
gpuDumpForces(gpu);
kClearBornForces(gpu);
kClearForces(gpu);
kCalculateObcGbsaForces1(gpu);
kReduceObcGbsaBornForces(gpu);
kCalculateObcGbsaForces2(gpu);
kReduceForces(gpu);
printf("OBC Forces\n");
gpuDumpForces(gpu);
kClearBornForces(gpu);
kClearForces(gpu);
kCalculateLocalForces(gpu);
kReduceForces(gpu);
printf("Local Forces\n");
gpuDumpForces(gpu);
kClearBornForces(gpu);
kClearForces(gpu);
kReduceForces(gpu);
printf("Cleared Forces\n");
gpuDumpForces(gpu);
return false;
}
return true;
}
extern "C"
void kCPUCalculate14(gpuContext gpu)
{
gpu->psPosq4->Download();
gpu->psForce4->Download();
// gpu->psLJ14ID->Download();
// gpu->psLJ14Parameter->Download();
for (int pos = 0; pos < (int) gpu->sim.LJ14s; pos++)
{
int4 atom = gpu->psLJ14ID->_pSysStream[0][pos];
float4 LJ14 = gpu->psLJ14Parameter->_pSysStream[0][pos];
float4 a1 = gpu->psPosq4->_pSysStream[0][atom.x];
float4 a2 = gpu->psPosq4->_pSysStream[0][atom.y];
float3 d;
d.x = a1.x - a2.x;
d.y = a1.y - a2.y;
d.z = a1.z - a2.z;
float r2 = d.x * d.x + d.y * d.y + d.z * d.z;
float inverseR = 1.0f / sqrt(r2);
float sig2 = inverseR * LJ14.y;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
dEdR += LJ14.z * inverseR;
dEdR *= inverseR * inverseR;
unsigned int offsetA = atom.x + atom.z * gpu->sim.stride;
unsigned int offsetB = atom.y + atom.w * gpu->sim.stride;
float4 forceA = gpu->psForce4->_pSysStream[0][offsetA];
float4 forceB = gpu->psForce4->_pSysStream[0][offsetB];
d.x *= dEdR;
d.y *= dEdR;
d.z *= dEdR;
forceA.x += d.x;
forceA.y += d.y;
forceA.z += d.z;
forceB.x -= d.x;
forceB.y -= d.y;
forceB.z -= d.z;
gpu->psForce4->_pSysStream[0][offsetA] = forceA;
gpu->psForce4->_pSysStream[0][offsetB] = forceB;
printf("%4d: %4d - %4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", pos, atom.x, atom.y, r2, dEdR, sig2, sig6, LJ14.x, LJ14.z);
}
}
extern "C"
void gpuDumpPrimeCoordinates(gpuContext gpu)
{
gpu->psPosqP4->Download();
for (int i = 0; i < gpu->natoms; i++)
{
printf("%4d: %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosqP4->_pSysStream[0][i].x,
gpu->psPosqP4->_pSysStream[0][i].y,
gpu->psPosqP4->_pSysStream[0][i].z,
gpu->psPosqP4->_pSysStream[0][i].w
);
}
}
extern "C"
void gpuDumpForces(gpuContext gpu)
{
gpu->psForce4->Download();
gpu->psBornForce->Download();
for (int i = 0; i < gpu->natoms; i++)
{
char buff[512];
sprintf(buff, "%4d: %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psForce4->_pSysStream[0][i].x,
gpu->psForce4->_pSysStream[0][i].y,
gpu->psForce4->_pSysStream[0][i].z,
gpu->psBornForce->_pSysStream[0][i]
);
// OutputDebugString(buff);
}
}
extern "C"
void gpuDumpAtomData(gpuContext gpu)
{
gpu->psPosq4->Download();
gpu->psSigEps2->Download();
gpu->psBornRadii->Download();
gpu->psObcChain->Download();
for (int i = 0; i < gpu->natoms; i++)
{
char buff[512];
sprintf(buff, "%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosq4->_pSysStream[0][i].x,
gpu->psPosq4->_pSysStream[0][i].y,
gpu->psPosq4->_pSysStream[0][i].z,
gpu->psPosq4->_pSysStream[0][i].w,
gpu->psSigEps2->_pSysStream[0][i].x,
gpu->psSigEps2->_pSysStream[0][i].y,
gpu->psBornRadii->_pSysStream[0][i],
gpu->psObcChain->_pSysStream[0][i]
);
// OutputDebugString((LPCWSTR)buff);
}
}
extern "C"
void gpuSetup(void* pVoid)
{
gpuContext gpu = (gpuContext)pVoid;
// Read parameters
cout << gpuReadAtomicParameters(gpu, "Data/atomicradii.txt") << " atom types\n";
cout << gpuReadBondParameters(gpu, "Data/GromacsHarmonicBondParameter.txt") << " bond parameters.\n";
cout << gpuReadBondAngleParameters(gpu, "Data/GromacsAngleBondParameter.txt") << " bond angle parameters.\n";
cout << gpuReadDihedralParameters(gpu, "Data/GromacsProperDihedralParameter.txt") << " proper dihedral parameters.\n";
cout << gpuReadRbDihedralParameters(gpu, "Data/GromacsRbDihedralParameter.txt") << " Ryckaert-Bellemans dihedral parameters.\n";
cout << gpuReadLJ14Parameters(gpu, "Data/GromacsLJ14Parameter.txt") << " Lennard-Jones 1-4 parameters.\n";
cout << gpuReadCoulombParameters(gpu, "Data/GromacsLJCoulombParameter.txt") << " Coulomb parameters.\n";
cout << gpuReadShakeParameters(gpu, "Data/GromacsShakeParameters.txt") << " shake parameters.\n";
// Build thread block work list
gpuBuildThreadBlockWorkList(gpu);
// Build exclusion list
gpuBuildExclusionList(gpu);
// Create output buffers
gpuBuildOutputBuffers(gpu);
// Set constant blocks
gpuSetConstants(gpu);
// Initialize randoms
gpuInitializeRandoms(gpu);
// Initialize Born Radii;
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
kClearForces(gpu);
kClearBornForces(gpu);
return;
}
#define DOT3(v1, v2) (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z)
#define GETNORMEDDOTPRODUCT(v1, v2, dp) \
{ \
dp = DOT3(v1, v2); \
float norm1 = DOT3(v1, v1); \
float norm2 = DOT3(v2, v2); \
dp /= sqrt(norm1 * norm2); \
dp = min(dp, 1.0f); \
dp = max(dp, -1.0f); \
}
#define CROSS_PRODUCT(v1, v2, c) \
c.x = v1.y * v2.z - v1.z * v2.y; \
c.y = v1.z * v2.x - v1.x * v2.z; \
c.z = v1.x * v2.y - v1.y * v2.x;
#define GETPREFACTORSGIVENANGLECOSINE(cosine, param, dEdR) \
{ \
float angle = acos(cosine); \
float deltaIdeal = angle - (param.x * (3.14159265f / 180.0f)); \
dEdR = param.y * deltaIdeal; \
}
#define GETANGLEBETWEENTWOVECTORS(v1, v2, angle) \
{ \
float dp; \
GETNORMEDDOTPRODUCT(v1, v2, dp); \
angle = acos(dp); \
}
#define GETANGLECOSINEBETWEENTWOVECTORS(v1, v2, angle, cosine) \
{ \
GETNORMEDDOTPRODUCT(v1, v2, cosine); \
angle = acos(cosine); \
}
#define GETDIHEDRALANGLEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle) \
{ \
CROSS_PRODUCT(vector1, vector2, cp0); \
CROSS_PRODUCT(vector2, vector3, cp1); \
GETANGLEBETWEENTWOVECTORS(cp0, cp1, angle); \
float dp = DOT3(signVector, cp1); \
angle = (dp >= 0) ? angle : -angle; \
}
#define GETDIHEDRALANGLECOSINEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle, cosine) \
{ \
CROSS_PRODUCT(vector1, vector2, cp0); \
CROSS_PRODUCT(vector2, vector3, cp1); \
GETANGLECOSINEBETWEENTWOVECTORS(cp0, cp1, angle, cosine); \
float dp = DOT3(signVector, cp1); \
angle = (dp >= 0) ? angle : -angle; \
}
// Calculate Local forces on CPU
extern "C"
void kCPUCalculateLocalForces(gpuContext gpu)
{
gpu->psPosq4->Download();
gpu->psForce4->Download();
gpu->psBondID->Download();
gpu->psBondParameter->Download();
gpu->psBondAngleID1->Download();
gpu->psBondAngleID2->Download();
gpu->psBondAngleParameter->Download();
gpu->psDihedralID1->Download();
gpu->psDihedralID2->Download();
gpu->psDihedralParameter->Download();
gpu->psRbDihedralID1->Download();
gpu->psRbDihedralID2->Download();
gpu->psRbDihedralParameter1->Download();
gpu->psRbDihedralParameter2->Download();
gpu->psLJ14ID->Download();
gpu->psLJ14Parameter->Download();
unsigned int pos = 0;
Vectors V;
Vectors* A = &V;
int violations = 0;
while (pos < gpu->sim.bond_offset)
{
if (pos < gpu->sim.bonds)
{
int4 atom = gpu->psBondID->_pSysStream[0][pos];
float4 atomA = gpu->psPosq4->_pSysStream[0][atom.x];
float4 atomB = gpu->psPosq4->_pSysStream[0][atom.y];
float2 bond = gpu->psBondParameter->_pSysStream[0][pos];
float dx = atomB.x - atomA.x;
float dy = atomB.y - atomA.y;
float dz = atomB.z - atomA.z;
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float deltaIdeal = r - bond.x;
float dEdR = bond.y * deltaIdeal;
dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
if (fabs(deltaIdeal) > 1.0f)
{
printf("Bond %4d: %11.4f %11.4f %11.4f %11.4f %11.4f %11.4f\n", pos, dx, dy, dz, r, deltaIdeal, dEdR);
violations++;
}
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
unsigned int offsetA = atom.x + atom.z * gpu->sim.stride;
unsigned int offsetB = atom.y + atom.w * gpu->sim.stride;
float4 forceA = gpu->psForce4->_pSysStream[0][offsetA];
float4 forceB = gpu->psForce4->_pSysStream[0][offsetB];
forceA.x += dx;
forceA.y += dy;
forceA.z += dz;
forceB.x -= dx;
forceB.y -= dy;
forceB.z -= dz;
gpu->psForce4->_pSysStream[0][offsetA] = forceA;
gpu->psForce4->_pSysStream[0][offsetB] = forceB;
}
pos++;
}
#if 0
while (pos < gpu->sim.bond_angle_offset)
{
unsigned int pos1 = pos - gpu->sim.bond_offset;
if (pos1 < gpu->sim.bond_angles)
{
int4 atom1 = gpu->psBondAngleID1->_pSysStream[0][pos1];
float2 bond_angle = gpu->psBondAngleParameter->_pSysStream[0][pos1];
float4 a1 = gpu->psPosq4->_pSysStream[0][atom1.x];
float4 a2 = gpu->psPosq4->_pSysStream[0][atom1.y];
float4 a3 = gpu->psPosq4->_pSysStream[0][atom1.z];
A->v0.x = a2.x - a1.x;
A->v0.y = a2.y - a1.y;
A->v0.z = a2.z - a1.z;
A->v1.x = a2.x - a3.x;
A->v1.y = a2.y - a3.y;
A->v1.z = a2.z - a3.z;
float3 cp;
CROSS_PRODUCT(A->v0, A->v1, cp);
float rp = DOT3(cp, cp); //cx * cx + cy * cy + cz * cz;
rp = max(sqrt(rp), 1.0e-06f);
float r21 = DOT3(A->v0, A->v0); // dx1 * dx1 + dy1 * dy1 + dz1 * dz1;
float r23 = DOT3(A->v1, A->v1); // dx2 * dx2 + dy2 * dy2 + dz2 * dz2;
float dot = DOT3(A->v0, A->v1); // dx1 * dx2 + dy1 * dy2 + dz1 * dz2;
float cosine = dot / sqrt(r21 * r23);
float dEdR;
GETPREFACTORSGIVENANGLECOSINE(cosine, bond_angle, dEdR);
printf("Bond angle %4d %11.4f %11.4f\n", pos1, cosine, dEdR);
float termA = dEdR / (r21 * rp);
float termC = -dEdR / (r23 * rp);
float3 c21;
float3 c23;
CROSS_PRODUCT(A->v0, cp, c21);
CROSS_PRODUCT(A->v1, cp, c23);
c21.x *= termA;
c21.y *= termA;
c21.z *= termA;
c23.x *= termC;
c23.y *= termC;
c23.z *= termC;
int2 atom2 = gpu->psBondAngleID2->_pSysStream[0][pos1];
unsigned int offset = atom1.x + atom1.w * gpu->sim.stride;
float4 force = gpu->psForce4->_pSysStream[0][offset];
force.x += c21.x;
force.y += c21.y;
force.z += c21.z;
gpu->psForce4->_pSysStream[0][offset] = force;
offset = atom1.y + atom2.x * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x -= (c21.x + c23.x);
force.y -= (c21.y + c23.y);
force.z -= (c21.z + c23.z);
gpu->psForce4->_pSysStream[0][offset] = force;
offset = atom1.z + atom2.y * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += c23.x;
force.y += c23.y;
force.z += c23.z;
gpu->psForce4->_pSysStream[0][offset] = force;
}
pos++;
}
while (pos < gpu->sim.dihedral_offset)
{
unsigned int pos1 = pos - gpu->sim.bond_angle_offset;
if (pos1 < gpu->sim.dihedrals)
{
int4 atom1 = gpu->psDihedralID1->_pSysStream[0][pos1];
float4 atomA = gpu->psPosq4->_pSysStream[0][atom1.x];
float4 atomB = gpu->psPosq4->_pSysStream[0][atom1.y];
float4 atomC = gpu->psPosq4->_pSysStream[0][atom1.z];
float4 atomD = gpu->psPosq4->_pSysStream[0][atom1.w];
A->v0.x = atomA.x - atomB.x;
A->v0.y = atomA.y - atomB.y;
A->v0.z = atomA.z - atomB.z;
A->v1.x = atomC.x - atomB.x;
A->v1.y = atomC.y - atomB.y;
A->v1.z = atomC.z - atomB.z;
A->v2.x = atomC.x - atomD.x;
A->v2.y = atomC.y - atomD.y;
A->v2.z = atomC.z - atomD.z;
float3 cp0, cp1;
float dihedralAngle;
GETDIHEDRALANGLEBETWEENTHREEVECTORS(A->v0, A->v1, A->v2, A->v0, cp0, cp1, dihedralAngle);
float4 dihedral = gpu->psDihedralParameter->_pSysStream[0][pos1];
float deltaAngle = dihedral.z * dihedralAngle - (dihedral.y * 3.14159265f / 180.0f);
float sinDeltaAngle = sin(deltaAngle);
float dEdAngle = -dihedral.x * dihedral.z * sinDeltaAngle;
float normCross1 = DOT3(cp0, cp0);
float normBC = sqrt(DOT3(A->v1, A->v1));
float4 ff;
ff.x = (-dEdAngle * normBC) / normCross1;
float normCross2 = DOT3(cp1, cp1);
ff.w = (dEdAngle * normBC) / normCross2;
float dp = 1.0f / DOT3(A->v1, A->v1);
ff.y = DOT3(A->v0, A->v1) * dp;
ff.z = DOT3(A->v2, A->v1) * dp;
int4 atom2 = gpu->psDihedralID2->_pSysStream[0][pos1];
float3 internalF0;
float3 internalF3;
float3 s;
// printf("%4d: %9.4f %9.4f %9.4f %9.4f\n", pos1, ff.x, ff.y, ff.z, ff.w);
unsigned int offset = atom1.x + atom2.x * gpu->sim.stride;
float4 force = gpu->psForce4->_pSysStream[0][offset];
internalF0.x = ff.x * cp0.x;
force.x += internalF0.x;
internalF0.y = ff.x * cp0.y;
force.y += internalF0.y;
internalF0.z = ff.x * cp0.z;
force.z += internalF0.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 0: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.w + atom2.w * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
internalF3.x = ff.w * cp1.x;
force.x += internalF3.x;
internalF3.y = ff.w * cp1.y;
force.y += internalF3.y;
internalF3.z = ff.w * cp1.z;
force.z += internalF3.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 3: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
s.x = ff.y * internalF0.x - ff.z * internalF3.x;
s.y = ff.y * internalF0.y - ff.z * internalF3.y;
s.z = ff.y * internalF0.z - ff.z * internalF3.z;
offset = atom1.y + atom2.y * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF0.x + s.x;
force.y += -internalF0.y + s.y;
force.z += -internalF0.z + s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 1: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.z + atom2.z * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF3.x - s.x;
force.y += -internalF3.y - s.y;
force.z += -internalF3.z - s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 2: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
}
pos++;
}
while (pos < gpu->sim.rb_dihedral_offset)
{
unsigned int pos1 = pos - gpu->sim.dihedral_offset;
if (pos1 < gpu->sim.rb_dihedrals)
{
int4 atom1 = gpu->psRbDihedralID1->_pSysStream[0][pos1];
float4 atomA = gpu->psPosq4->_pSysStream[0][atom1.x];
float4 atomB = gpu->psPosq4->_pSysStream[0][atom1.y];
float4 atomC = gpu->psPosq4->_pSysStream[0][atom1.z];
float4 atomD = gpu->psPosq4->_pSysStream[0][atom1.w];
A->v0.x = atomA.x - atomB.x;
A->v0.y = atomA.y - atomB.y;
A->v0.z = atomA.z - atomB.z;
A->v1.x = atomC.x - atomB.x;
A->v1.y = atomC.y - atomB.y;
A->v1.z = atomC.z - atomB.z;
A->v2.x = atomC.x - atomD.x;
A->v2.y = atomC.y - atomD.y;
A->v2.z = atomC.z - atomD.z;
float3 cp0, cp1;
float dihedralAngle, cosPhi;
// printf("%4d - 0 : %9.4f %9.4f %9.4f\n", pos1, A->v0.x, A->v0.y, A->v0.z);
// printf("%4d - 1 : %9.4f %9.4f %9.4f\n", pos1, A->v1.x, A->v1.y, A->v1.z);
// printf("%4d - 2 : %9.4f %9.4f %9.4f\n", pos1, A->v2.x, A->v2.y, A->v2.z);
GETDIHEDRALANGLECOSINEBETWEENTHREEVECTORS(A->v0, A->v1, A->v2, A->v0, cp0, cp1, dihedralAngle, cosPhi);
if (dihedralAngle < 0.0f )
{
dihedralAngle += 3.14159265f;
}
else
{
dihedralAngle -= 3.14159265f;
}
cosPhi = -cosPhi;
// printf("%4d: %9.4f %9.4f\n", pos1, dihedralAngle, cosPhi);
float4 dihedral1 = gpu->psRbDihedralParameter1->_pSysStream[0][pos1];
float2 dihedral2 = gpu->psRbDihedralParameter2->_pSysStream[0][pos1];
float cosFactor = cosPhi;
float dEdAngle = -dihedral1.y;
// printf("%4d - 1: %9.4f %9.4f\n", pos1, dEdAngle, 1.0f);
dEdAngle -= 2.0f * dihedral1.z * cosFactor;
// printf("%4d - 2: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 3.0f * dihedral1.w * cosFactor;
// printf("%4d - 3: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 4.0f * dihedral2.x * cosFactor;
// printf("%4d - 4: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 5.0f * dihedral2.y * cosFactor;
// printf("%4d - 5: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
dEdAngle *= sin(dihedralAngle);
// printf("%4d - f: %9.4f\n", pos1, dEdAngle);
float normCross1 = DOT3(cp0, cp0);
float normBC = sqrt(DOT3(A->v1, A->v1));
float4 ff;
ff.x = (-dEdAngle * normBC) / normCross1;
float normCross2 = DOT3(cp1, cp1);
ff.w = (dEdAngle * normBC) / normCross2;
float dp = 1.0f / DOT3(A->v1, A->v1);
ff.y = DOT3(A->v0, A->v1) * dp;
ff.z = DOT3(A->v2, A->v1) * dp;
int4 atom2 = gpu->psRbDihedralID2->_pSysStream[0][pos1];
float3 internalF0;
float3 internalF3;
float3 s;
printf("RB Dihedral %4d: %9.4f %9.4f %9.4f %9.4f\n", pos1, ff.x, ff.y, ff.z, ff.w);
unsigned int offset = atom1.x + atom2.x * gpu->sim.stride;
float4 force = gpu->psForce4->_pSysStream[0][offset];
internalF0.x = ff.x * cp0.x;
force.x += internalF0.x;
internalF0.y = ff.x * cp0.y;
force.y += internalF0.y;
internalF0.z = ff.x * cp0.z;
force.z += internalF0.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("RB Dihedral %4d - 0: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.w + atom2.w * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
internalF3.x = ff.w * cp1.x;
force.x += internalF3.x;
internalF3.y = ff.w * cp1.y;
force.y += internalF3.y;
internalF3.z = ff.w * cp1.z;
force.z += internalF3.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("RB Dihedral %4d - 3: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
s.x = ff.y * internalF0.x - ff.z * internalF3.x;
s.y = ff.y * internalF0.y - ff.z * internalF3.y;
s.z = ff.y * internalF0.z - ff.z * internalF3.z;
offset = atom1.y + atom2.y * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF0.x + s.x;
force.y += -internalF0.y + s.y;
force.z += -internalF0.z + s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("RB Dihedral %4d - 1: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.z + atom2.z * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF3.x - s.x;
force.y += -internalF3.y - s.y;
force.z += -internalF3.z - s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
// printf("%4d - 2: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
}
pos++;
}
while (pos < gpu->sim.LJ14_offset)
{
unsigned int pos1 = pos - gpu->sim.rb_dihedral_offset;
if (pos1 < gpu->sim.LJ14s)
{
int4 atom = gpu->psLJ14ID->_pSysStream[0][pos1];
float4 LJ14 = gpu->psLJ14Parameter->_pSysStream[0][pos1];
float4 a1 = gpu->psPosq4->_pSysStream[0][atom.x];
float4 a2 = gpu->psPosq4->_pSysStream[0][atom.y];
float3 d;
d.x = a1.x - a2.x;
d.y = a1.y - a2.y;
d.z = a1.z - a2.z;
float r2 = DOT3(d, d);
float inverseR = 1.0f / sqrt(r2);
float sig2 = inverseR * LJ14.y;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
dEdR += LJ14.z * inverseR;
dEdR *= inverseR * inverseR;
unsigned int offsetA = atom.x + atom.z * gpu->sim.stride;
unsigned int offsetB = atom.y + atom.w * gpu->sim.stride;
float4 forceA = gpu->psForce4->_pSysStream[0][offsetA];
float4 forceB = gpu->psForce4->_pSysStream[0][offsetB];
d.x *= dEdR;
d.y *= dEdR;
d.z *= dEdR;
forceA.x += d.x;
forceA.y += d.y;
forceA.z += d.z;
forceB.x -= d.x;
forceB.y -= d.y;
forceB.z -= d.z;
printf("LJ14 %d: %11.4f %11.4f %11.4f\n", pos1, d.x, d.y, d.z);
gpu->psForce4->_pSysStream[0][offsetA] = forceA;
gpu->psForce4->_pSysStream[0][offsetB] = forceB;
}
pos++;
}
#endif
if (violations > 0)
{
gpuDumpCoordinates(gpu);
gpuDumpForces(gpu);
}
}
static FILE* getWriteToFilePtr( char* fname, int step )
{
std::stringstream fileName;
fileName << fname << "_";
fileName << step;
fileName << ".txt";
FILE* filePtr = fopen( fileName.str().c_str(), "w" );
if( filePtr == NULL ){
(void) fprintf( stderr, "Could not open file=<%s> for writitng.", fileName.str().c_str() );
exit(-1);
}
return filePtr;
}
extern "C" {
static void printValues( FILE* filePtr, int index, int numberOfValues, float* values )
{
int i;
(void) fprintf( filePtr, "%5d ", index );
for ( i = 0; i < numberOfValues; i++ ) {
(void) fprintf( filePtr, " %18.10e", values[i] );
}
(void) fprintf( filePtr, "\n" );
(void) fflush( filePtr );
}
}
extern "C"
void WriteArrayToFile1( gpuContext gpu, char* fname, int step, CUDAStream<float>* psPos, int numPrint )
{
int i;
static const int numberOfValues = 1;
FILE* filePtr = getWriteToFilePtr( fname, step );
float values[numberOfValues];
psPos->Download();
numPrint = (numPrint > 0 && (numPrint < gpu->natoms)) ? numPrint : gpu->natoms;
for ( i = 0; i < numPrint; i++ ) {
values[0] = psPos->_pSysStream[0][i];
printValues( filePtr, i, numberOfValues, values );
}
for ( i = gpu->natoms - numPrint; i < gpu->natoms; i++ ) {
values[0] = psPos->_pSysStream[0][i];
printValues( filePtr, i, numberOfValues, values );
}
(void) fclose( filePtr );
}
extern "C"
void WriteArrayToFile2( gpuContext gpu, char* fname, int step, CUDAStream<float2>* psPos, int numPrint )
{
int i;
static const int numberOfValues = 2;
FILE* filePtr = getWriteToFilePtr( fname, step );
float values[numberOfValues];
psPos->Download();
numPrint = (numPrint > 0 && (numPrint < gpu->natoms)) ? numPrint : gpu->natoms;
for ( i = 0; i < numPrint; i++ ) {
values[0] = psPos->_pSysStream[0][i].x;
values[1] = psPos->_pSysStream[0][i].y;
printValues( filePtr, i, numberOfValues, values );
}
for ( i = gpu->natoms - numPrint; i < gpu->natoms; i++ ) {
values[0] = psPos->_pSysStream[0][i].x;
values[1] = psPos->_pSysStream[0][i].y;
printValues( filePtr, i, numberOfValues, values );
}
(void) fclose( filePtr );
}
extern "C"
void WriteArrayToFile4( gpuContext gpu, char* fname, int step, CUDAStream<float4>* psPos, int numPrint )
{
int i;
static const int numberOfValues = 4;
FILE* filePtr = getWriteToFilePtr( fname, step );
float values[numberOfValues];
psPos->Download();
numPrint = (numPrint > 0 && (numPrint < gpu->natoms)) ? numPrint : gpu->natoms;
for ( i = 0; i < numPrint; i++ ) {
values[0] = psPos->_pSysStream[0][i].x;
values[1] = psPos->_pSysStream[0][i].y;
values[2] = psPos->_pSysStream[0][i].z;
values[3] = psPos->_pSysStream[0][i].w;
printValues( filePtr, i, numberOfValues, values );
}
for ( i = gpu->natoms - numPrint; i < gpu->natoms; i++ ) {
values[0] = psPos->_pSysStream[0][i].x;
values[1] = psPos->_pSysStream[0][i].y;
values[2] = psPos->_pSysStream[0][i].z;
values[3] = psPos->_pSysStream[0][i].w;
printValues( filePtr, i, numberOfValues, values );
}
(void) fclose( filePtr );
}
extern "C"
void gpuDumpObcInfo(gpuContext gpu)
{
gpu->psPosq4->Download();
gpu->psBornRadii->Download();
gpu->psObcData->Download();
gpu->psBornSum->Download();
printf( "\n\nObc Info xyzw Brad atomR scaledAtomR\n" );
for (int i = 0; i < gpu->natoms; i++)
{
printf("%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosq4->_pSysStream[0][i].x,
gpu->psPosq4->_pSysStream[0][i].y,
gpu->psPosq4->_pSysStream[0][i].z,
gpu->psPosq4->_pSysStream[0][i].w,
gpu->psBornRadii->_pSysStream[0][i],
gpu->psBornSum->_pSysStream[0][i],
gpu->psObcData->_pSysStream[0][i].x,
gpu->psObcData->_pSysStream[0][i].y
);
}
}
extern "C"
void gpuDumpObcLoop1(gpuContext gpu)
{
float compF;
gpu->psForce4->Download();
gpu->psBornRadii->Download();
gpu->psBornForce->Download();
gpu->psObcChain->Download();
gpu->psBornSum->Download();
printf( "\n\nObc F3 BrnR BrnF Chn\n" );
for (int i = 0; i < gpu->natoms; i++)
{
compF = gpu->psBornForce->_pSysStream[0][i]/(gpu->psBornRadii->_pSysStream[0][i]*gpu->psBornRadii->_pSysStream[0][i]*gpu->psObcChain->_pSysStream[0][i]);
printf("%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psForce4->_pSysStream[0][i].x,
gpu->psForce4->_pSysStream[0][i].y,
gpu->psForce4->_pSysStream[0][i].z,
// gpu->psForce4->_pSysStream[0][i].w,
gpu->psBornRadii->_pSysStream[0][i],
compF,
gpu->psBornForce->_pSysStream[0][i],
// gpu->psBornSum->_pSysStream[0][i],
gpu->psObcChain->_pSysStream[0][i]
);
}
}
static void tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds)
{
// Recursively tag atoms as belonging to a particular molecule.
......
......@@ -138,52 +138,25 @@ typedef struct _gpuContext *gpuContext;
extern "C"
bool gpuIsAvailable();
extern "C"
int gpuReadBondParameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetBondParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<float>& length, const std::vector<float>& k);
extern "C"
int gpuReadBondAngleParameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetBondAngleParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3,
const std::vector<float>& angle, const std::vector<float>& k);
extern "C"
int gpuReadDihedralParameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetDihedralParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3, const std::vector<int>& atom4,
const std::vector<float>& k, const std::vector<float>& phase, const std::vector<int>& periodicity);
extern "C"
int gpuReadRbDihedralParameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetRbDihedralParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3, const std::vector<int>& atom4,
const std::vector<float>& c0, const std::vector<float>& c1, const std::vector<float>& c2, const std::vector<float>& c3, const std::vector<float>& c4, const std::vector<float>& c5);
extern "C"
int gpuReadLJ14Parameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const std::vector<int>& atom1, const std::vector<int>& atom2,
const std::vector<float>& c6, const std::vector<float>& c12, const std::vector<float>& q1, const std::vector<float>& q2);
extern "C"
float gpuGetAtomicRadius(gpuContext gpu, std::string s);
extern "C"
unsigned char gpuGetAtomicSymbol(gpuContext gpu, std::string s);
extern "C"
int gpuReadAtomicParameters(gpuContext gpu, char* fname);
extern "C"
int gpuReadCoulombParameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const std::vector<int>& atom, const std::vector<float>& c6, const std::vector<float>& c12, const std::vector<float>& q,
const std::vector<char>& symbol, const std::vector<std::vector<int> >& exclusions, CudaNonbondedMethod method);
......@@ -197,9 +170,6 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
extern "C"
void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<int>& atom, const std::vector<float>& radius, const std::vector<float>& scale);
extern "C"
int gpuReadShakeParameters(gpuContext gpu, char* fname);
extern "C"
void gpuSetShakeParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<float>& distance,
const std::vector<float>& invMass1, const std::vector<float>& invMass2, float tolerance);
......@@ -207,9 +177,6 @@ void gpuSetShakeParameters(gpuContext gpu, const std::vector<int>& atom1, const
extern "C"
int gpuAllocateInitialBuffers(gpuContext gpu);
extern "C"
void gpuReadCoordinates(gpuContext gpu, char* fname);
extern "C"
void gpuSetPositions(gpuContext gpu, const std::vector<float>& x, const std::vector<float>& y, const std::vector<float>& z);
......@@ -222,9 +189,6 @@ void gpuSetMass(gpuContext gpu, const std::vector<float>& mass);
extern "C"
void gpuInitializeRandoms(gpuContext gpu);
extern "C"
void* gpuInitFromFile(char* fname);
extern "C"
void* gpuInit(int numAtoms);
......@@ -255,48 +219,6 @@ void gpuBuildExclusionList(gpuContext gpu);
extern "C"
int gpuSetConstants(gpuContext gpu);
extern "C"
void gpuDumpCoordinates(gpuContext gpu);
extern "C"
void gpuDumpPrimeCoordinates(gpuContext gpu);
extern "C"
void gpuDumpForces(gpuContext gpu);
extern "C"
void gpuDumpAtomData(gpuContext gpu);
extern "C"
bool gpuCheckData(gpuContext gpu);
extern "C"
void gpuSetup(void* pVoid);
extern "C"
void kCPUCalculate14(gpuContext gpu);
extern "C"
void kCPUCalculateLocalForces(gpuContext gpu);
extern "C"
void WriteArrayToFile1( gpuContext gpu, char* fname, int step, CUDAStream<float>* psPos, int numPrint );
extern "C"
void WriteArrayToFile2( gpuContext gpu, char* fname, int step, CUDAStream<float2>* psPos, int numPrint );
extern "C"
void WriteArrayToFile3( gpuContext gpu, char* fname, int step, CUDAStream<float3>* psPos, int numPrint );
extern "C"
void WriteArrayToFile4( gpuContext gpu, char* fname, int step, CUDAStream<float4>* psPos, int numPrint );
extern "C"
void gpuDumpObcInfo(gpuContext gpu);
extern "C"
void gpuDumpObcLoop1(gpuContext gpu);
extern "C"
void gpuReorderAtoms(gpuContext gpu);
......
......@@ -152,18 +152,6 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
// printf("kReduceObcGbsaBornSum\n");
kReduceObcGbsaBornSum_kernel<<<gpu->sim.blocks, 384>>>();
gpu->bRecalculateBornRadii = false;
if( 0 ){
static int step = 0;
int numPrint = -1;
step++;
WriteArrayToFile1( gpu, "ObcGbsaBornBRad", step, gpu->psBornRadii, numPrint );
WriteArrayToFile1( gpu, "ObcGbsaBornSum", step, gpu->psBornSum, numPrint );
WriteArrayToFile2( gpu, "ObcGbsaObcData", step, gpu->psObcData, numPrint );
WriteArrayToFile4( gpu, "ObcGbsaBornPos", step, gpu->psPosq4, numPrint );
//gpuDumpCoordinates( gpu );
gpuDumpObcInfo( gpu );
}
LAUNCHERROR("kReduceObcGbsaBornSum");
}
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
#include "gputypes.h"
struct Atom {
float x;
float y;
float z;
float q;
float br;
float fx;
float fy;
float fz;
float fb;
};
__shared__ Atom sA[G8X_NONBOND_THREADS_PER_BLOCK];
__shared__ unsigned int sWorkUnit[G8X_NONBOND_WORKUNITS_PER_SM];
__shared__ unsigned int sNext[GRID];
static __constant__ cudaGmxSimulation cSim;
void SetCalculateObcGbsaForces1Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetCalculateObcGbsaForces1Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
__global__ void kReduceObcGbsaBornForces_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float bornRadius = cSim.pBornRadii[pos];
float obcChain = cSim.pObcChain[pos];
float2 obcData = cSim.pObcData[pos];
float totalForce = 0.0f;
float* pFt = cSim.pBornForce + pos;
int i = cSim.nonbondOutputBuffers;
while (i >= 4)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
float f3 = *pFt;
pFt += cSim.stride;
float f4 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2 + f3 + f4;
i -= 4;
}
if (i >= 2)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2;
i -= 2;
}
if (i > 0)
{
totalForce += *pFt;
}
// __syncthreads();
//printf("%4d: %9.4f %9.4f %9.4f\n", pos, totalForce, bornRadius, obcChain);
//totalForce = 0.0f;
// if (bornRadius > 0.0f)
// {
float r = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
float ratio6 = pow((obcData.x + cSim.dielectricOffset) / bornRadius, 6.0f);
//float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius; // 1.102 == Temp mysterious fudge factor, FIX FIX FIX
// }
totalForce *= bornRadius * bornRadius * obcChain;
pFt = cSim.pBornForce + pos;
*pFt = totalForce;
pos += gridDim.x * blockDim.x;
}
}
__global__ void kReduceObcGbsaBornForces1_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float bornRadius = cSim.pBornRadii[pos];
float obcChain = cSim.pObcChain[pos];
//float2 obcData = cSim.pObcData[pos];
float totalForce = 0.0f;
float* pFt = cSim.pBornForce + pos;
int i = cSim.nonbondOutputBuffers;
while (i >= 4)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
float f3 = *pFt;
pFt += cSim.stride;
float f4 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2 + f3 + f4;
i -= 4;
}
if (i >= 2)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2;
i -= 2;
}
if (i > 0)
{
totalForce += *pFt;
}
// __syncthreads();
//printf("%4d: %9.4f %9.4f %9.4f\n", pos, totalForce, bornRadius, obcChain);
//totalForce = 0.0f;
/*
// if (bornRadius > 0.0f)
// {
float r = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
float ratio6 = pow((obcData.x + cSim.dielectricOffset) / bornRadius, 6.0f);
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius; // 1.102 == Temp mysterious fudge factor, FIX FIX FIX
// }
*/
totalForce *= bornRadius * bornRadius * obcChain;
cSim.pBornForce[pos] = totalForce;
pos += gridDim.x * blockDim.x;
}
}
__global__ void kAceGbsa_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float bornRadius = cSim.pBornRadii[pos];
float obcChain = cSim.pObcChain[pos];
float2 obcData = cSim.pObcData[pos];
float totalForce = cSim.pBornForce[pos];
//float totalForce = 0.0f;
float r = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
float ratio6 = pow((obcData.x + cSim.dielectricOffset) / bornRadius, 6.0f);
/*
float ratio6 = (obcData.x + cSim.dielectricOffset) / bornRadius;
ratio6 = ratio6*ratio6;
ratio6 = ratio6*ratio6*ratio6;
*/
//float saTerm = 41.84f*cSim.surfaceAreaFactor * r * r * ratio6;
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius; // 1.102 == Temp mysterious fudge factor, FIX FIX FIX
totalForce *= bornRadius * bornRadius * obcChain;
cSim.pBornForce[pos] = totalForce;
pos += gridDim.x * blockDim.x;
}
}
void kReduceObcGbsaBornForces(gpuContext gpu)
{
//printf("kReduceObcGbsaBornForces QQ\n");
kReduceObcGbsaBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
//kReduceObcGbsaBornForces1_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
//kAceGbsa_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
//printf("kReduceObcGbsaBornForces calling gpuDumpObcLoop1 QQ\n");
//gpuDumpObcLoop1(gpu);
}
__global__ void kCalculateObcGbsaForces1_kernel()
{
// Read queue of work blocks once so the remainder of
// kernel can run asynchronously
int pos = cSim.nbWorkUnitsPerBlock * blockIdx.x + min(blockIdx.x, cSim.nbWorkUnitsPerBlockRemainder);
int end = cSim.nbWorkUnitsPerBlock * (blockIdx.x + 1) + min((blockIdx.x + 1), cSim.nbWorkUnitsPerBlockRemainder);
if (threadIdx.x < end - pos)
{
sWorkUnit[threadIdx.x] = cSim.pWorkUnit[pos + threadIdx.x];
}
if (threadIdx.x < GRID)
{
sNext[threadIdx.x] = (threadIdx.x + 1) & (GRID - 1);
}
__syncthreads();
// Now change pos and end to reflect work queue just read
// into shared memory
end = end - pos;
pos = end - (threadIdx.x >> GRIDBITS) - 1;
while (pos >= 0)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = sWorkUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
x = (x >> 17) << GRIDBITS;
float4 apos; // Local atom x, y, z, q
float4 af; // Local atom fx, fy, fz, fb
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
int tj = tgx;
Atom* psA = &sA[tbx];
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned int i = x + tgx;
apos = cSim.pPosq[i];
float br = cSim.pBornRadii[i];
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].q = apos.w;
sA[threadIdx.x].br = br;
af.x = 0.0f;
af.y = 0.0f;
af.z = 0.0f;
af.w = 0.0f;
apos.w *= cSim.preFactor;
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (apos.w * psA[j].q) / (denominator * denominator2);
float dGpol_dr = Gpol * (1.0f - 0.25f * expTerm);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dx *= dGpol_dr;
dy *= dGpol_dr;
dz *= dGpol_dr;
af.x -= dx;
af.y -= dy;
af.z -= dz;
af.w += dGpol_dalpha2_ij * psA[j].br;
}
// Write results
int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
int j = y + tgx;
unsigned int i = x + tgx;
float4 temp = cSim.pPosq[j];
float temp1 = cSim.pBornRadii[j];
apos = cSim.pPosq[i];
float br = cSim.pBornRadii[i];
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].q = temp.w;
sA[threadIdx.x].br = temp1;
sA[threadIdx.x].fx = af.x = 0.0f;
sA[threadIdx.x].fy = af.y = 0.0f;
sA[threadIdx.x].fz = af.z = 0.0f;
sA[threadIdx.x].fb = af.w = 0.0f;
apos.w *= cSim.preFactor;
for (j = 0; j < GRID; j++)
{
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float alpha2_ij = br * psA[tj].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (apos.w * psA[tj].q) / (denominator * denominator2);
float dGpol_dr = Gpol * (1.0f - 0.25f * expTerm);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dx *= dGpol_dr;
dy *= dGpol_dr;
dz *= dGpol_dr;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br;
tj = sNext[tj];
}
// Write results
int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
af.x = sA[threadIdx.x].fx;
af.y = sA[threadIdx.x].fy;
af.z = sA[threadIdx.x].fz;
af.w = sA[threadIdx.x].fb;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
}
pos -= cSim.nonbond_workBlock;
}
}
__global__ extern void kCalculateObcGbsaForces1_12_kernel();
void kCalculateObcGbsaForces1(gpuContext gpu)
{
//printf("kCalculateObcGbsaForces1 version=%d sm_12=%d QQ\n", gpu->sm_version, SM_12);
if (gpu->sm_version < SM_12)
kCalculateObcGbsaForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
else
kCalculateObcGbsaForces1_12_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateObcGbsaForce1");
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
#include "gputypes.h"
struct Atom {
float x;
float y;
float z;
float q;
float br;
float fx;
float fy;
float fz;
float fb;
};
__shared__ Atom sA[GT2XX_NONBOND_THREADS_PER_BLOCK];
__shared__ unsigned int sWorkUnit[GT2XX_NONBOND_WORKUNITS_PER_SM];
__shared__ unsigned int sNext[GRID];
static __constant__ cudaGmxSimulation cSim;
void SetCalculateObcGbsaForces1_12Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetCalculateObcGbsaForces1_12Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
__global__ void kCalculateObcGbsaForces1_12_kernel()
{
// Read queue of work blocks once so the remainder of
// kernel can run asynchronously
int pos = cSim.nbWorkUnitsPerBlock * blockIdx.x + min(blockIdx.x, cSim.nbWorkUnitsPerBlockRemainder);
int end = cSim.nbWorkUnitsPerBlock * (blockIdx.x + 1) + min((blockIdx.x + 1), cSim.nbWorkUnitsPerBlockRemainder);
if (threadIdx.x < end - pos)
{
sWorkUnit[threadIdx.x] = cSim.pWorkUnit[pos + threadIdx.x];
}
if (threadIdx.x < GRID)
{
sNext[threadIdx.x] = (threadIdx.x + 1) & (GRID - 1);
}
__syncthreads();
// Now change pos and end to reflect work queue just read
// into shared memory
end = end - pos;
pos = end - (threadIdx.x >> GRIDBITS) - 1;
while (pos >= 0)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = sWorkUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
x = (x >> 17) << GRIDBITS;
float4 apos; // Local atom x, y, z, q
float4 af; // Local atom fx, fy, fz, fb
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
int tj = tgx;
Atom* psA = &sA[tbx];
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned int i = x + tgx;
apos = cSim.pPosq[i];
float br = cSim.pBornRadii[i];
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].q = apos.w;
sA[threadIdx.x].br = br;
af.x = 0.0f;
af.y = 0.0f;
af.z = 0.0f;
af.w = 0.0f;
apos.w *= cSim.preFactor;
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (apos.w * psA[j].q) / (denominator * denominator2);
float dGpol_dr = Gpol * (1.0f - 0.25f * expTerm);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dx *= dGpol_dr;
dy *= dGpol_dr;
dz *= dGpol_dr;
af.x -= dx;
af.y -= dy;
af.z -= dz;
af.w += dGpol_dalpha2_ij * psA[j].br;
}
// Write results
int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
int j = y + tgx;
unsigned int i = x + tgx;
float4 temp = cSim.pPosq[j];
float temp1 = cSim.pBornRadii[j];
apos = cSim.pPosq[i];
float br = cSim.pBornRadii[i];
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].q = temp.w;
sA[threadIdx.x].br = temp1;
sA[threadIdx.x].fx = af.x = 0.0f;
sA[threadIdx.x].fy = af.y = 0.0f;
sA[threadIdx.x].fz = af.z = 0.0f;
sA[threadIdx.x].fb = af.w = 0.0f;
apos.w *= cSim.preFactor;
for (j = 0; j < GRID; j++)
{
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float alpha2_ij = br * psA[tj].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (apos.w * psA[tj].q) / (denominator * denominator2);
float dGpol_dr = Gpol * (1.0f - 0.25f * expTerm);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dx *= dGpol_dr;
dy *= dGpol_dr;
dz *= dGpol_dr;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br;
tj = sNext[tj];
}
// Write results
int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
af.x = sA[threadIdx.x].fx;
af.y = sA[threadIdx.x].fy;
af.z = sA[threadIdx.x].fz;
af.w = sA[threadIdx.x].fb;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
}
pos -= cSim.nonbond_workBlock;
}
}
void kCalculateObcGbsaForces1_12(gpuContext gpu)
{
// printf("kCalculateObcGbsaForces1_12\n");
kCalculateObcGbsaForces1_12_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateObcGbsaForce1_12");
}
......@@ -259,3 +259,64 @@ void kReduceForces(gpuContext gpu)
LAUNCHERROR("kReduceForces");
}
__global__ void kReduceObcGbsaBornForces_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float bornRadius = cSim.pBornRadii[pos];
float obcChain = cSim.pObcChain[pos];
float2 obcData = cSim.pObcData[pos];
float totalForce = 0.0f;
float* pFt = cSim.pBornForce + pos;
int i = cSim.nonbondOutputBuffers;
while (i >= 4)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
float f3 = *pFt;
pFt += cSim.stride;
float f4 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2 + f3 + f4;
i -= 4;
}
if (i >= 2)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2;
i -= 2;
}
if (i > 0)
{
totalForce += *pFt;
}
float r = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
float ratio6 = pow((obcData.x + cSim.dielectricOffset) / bornRadius, 6.0f);
//float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius; // 1.102 == Temp mysterious fudge factor, FIX FIX FIX
totalForce *= bornRadius * bornRadius * obcChain;
pFt = cSim.pBornForce + pos;
*pFt = totalForce;
pos += gridDim.x * blockDim.x;
}
}
void kReduceObcGbsaBornForces(gpuContext gpu)
{
//printf("kReduceObcGbsaBornForces\n");
kReduceObcGbsaBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
LAUNCHERROR("kReduceObcGbsaBornForces");
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment