Commit 79e2fb0e authored by peastman's avatar peastman
Browse files

Merge pull request #167 from peastman/cpu

Created optimized CPU platform
parents eba7ac01 2943d628
...@@ -77,7 +77,7 @@ int ...@@ -77,7 +77,7 @@ int
pme_exec(pme_t pme, pme_exec(pme_t pme,
std::vector<OpenMM::RealVec>& atomCoordinates, std::vector<OpenMM::RealVec>& atomCoordinates,
std::vector<OpenMM::RealVec>& forces, std::vector<OpenMM::RealVec>& forces,
RealOpenMM ** atomParameters, std::vector<RealOpenMM>& charges,
const RealOpenMM periodicBoxSize[3], const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy, RealOpenMM * energy,
RealOpenMM pme_virial[3][3]); RealOpenMM pme_virial[3][3]);
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#ifndef __ReferencePairIxn_H__ #ifndef __ReferencePairIxn_H__
#define __ReferencePairIxn_H__ #define __ReferencePairIxn_H__
#include "RealVec.h"
#include "openmm/internal/windowsExport.h" #include "openmm/internal/windowsExport.h"
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "ReferenceLJCoulombIxn.h" #include "ReferenceLJCoulombIxn.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "PME.h" #include "ReferencePME.h"
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -233,7 +233,10 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -233,7 +233,10 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
pme_init(&pmedata,alphaEwald,numberOfAtoms,meshDim,5,1); pme_init(&pmedata,alphaEwald,numberOfAtoms,meshDim,5,1);
pme_exec(pmedata,atomCoordinates,forces,atomParameters,periodicBoxSize,&recipEnergy,virial); vector<RealOpenMM> charges(numberOfAtoms);
for (int i = 0; i < numberOfAtoms; i++)
charges[i] = atomParameters[i][QIndex];
pme_exec(pmedata,atomCoordinates,forces,charges,periodicBoxSize,&recipEnergy,virial);
if( totalEnergy ) if( totalEnergy )
*totalEnergy += recipEnergy; *totalEnergy += recipEnergy;
......
...@@ -44,9 +44,7 @@ void OPENMM_EXPORT computeNeighborListNaive( ...@@ -44,9 +44,7 @@ void OPENMM_EXPORT computeNeighborListNaive(
bool usePeriodic, bool usePeriodic,
double maxDistance, double maxDistance,
double minDistance, double minDistance,
bool reportSymmetricPairs bool reportSymmetricPairs) {
)
{
neighborList.clear(); neighborList.clear();
double maxDistanceSquared = maxDistance * maxDistance; double maxDistanceSquared = maxDistance * maxDistance;
...@@ -102,15 +100,19 @@ public: ...@@ -102,15 +100,19 @@ public:
nx = (int) floor(periodicBoxSize[0]/voxelSizeX+0.5); nx = (int) floor(periodicBoxSize[0]/voxelSizeX+0.5);
ny = (int) floor(periodicBoxSize[1]/voxelSizeY+0.5); ny = (int) floor(periodicBoxSize[1]/voxelSizeY+0.5);
nz = (int) floor(periodicBoxSize[2]/voxelSizeZ+0.5); nz = (int) floor(periodicBoxSize[2]/voxelSizeZ+0.5);
voxelSizeX = periodicBoxSize[0]/nx;
voxelSizeY = periodicBoxSize[1]/ny;
voxelSizeZ = periodicBoxSize[2]/nz;
} }
} }
void insert(const AtomIndex& item, const RealVec& location) void insert(const AtomIndex& item, const RealVec& location)
{ {
VoxelIndex voxelIndex = getVoxelIndex(location); VoxelIndex voxelIndex = getVoxelIndex(location);
if ( voxelMap.find(voxelIndex) == voxelMap.end() ) voxelMap[voxelIndex] = Voxel(); if (voxelMap.find(voxelIndex) == voxelMap.end())
voxelMap[voxelIndex] = Voxel();
Voxel& voxel = voxelMap.find(voxelIndex)->second; Voxel& voxel = voxelMap.find(voxelIndex)->second;
voxel.push_back( VoxelItem(&location, item) ); voxel.push_back(VoxelItem(&location, item));
} }
...@@ -180,8 +182,9 @@ public: ...@@ -180,8 +182,9 @@ public:
voxelIndex.y = (y+ny)%ny; voxelIndex.y = (y+ny)%ny;
voxelIndex.z = (z+nz)%nz; voxelIndex.z = (z+nz)%nz;
} }
if (voxelMap.find(voxelIndex) == voxelMap.end()) continue; // no such voxel; skip const map<VoxelIndex, Voxel>::const_iterator voxelEntry = voxelMap.find(voxelIndex);
const Voxel& voxel = voxelMap.find(voxelIndex)->second; if (voxelEntry == voxelMap.end()) continue; // no such voxel; skip
const Voxel& voxel = voxelEntry->second;
for (Voxel::const_iterator itemIter = voxel.begin(); itemIter != voxel.end(); ++itemIter) for (Voxel::const_iterator itemIter = voxel.begin(); itemIter != voxel.end(); ++itemIter)
{ {
const AtomIndex atomJ = itemIter->second; const AtomIndex atomJ = itemIter->second;
...@@ -190,13 +193,13 @@ public: ...@@ -190,13 +193,13 @@ public:
// Ignore self hits // Ignore self hits
if (atomI == atomJ) continue; if (atomI == atomJ) continue;
// Ignore exclusions.
if (exclusions[atomI].find(atomJ) != exclusions[atomI].end()) continue;
double dSquared = compPairDistanceSquared(locationI, locationJ, periodicBoxSize, usePeriodic); double dSquared = compPairDistanceSquared(locationI, locationJ, periodicBoxSize, usePeriodic);
if (dSquared > maxDistanceSquared) continue; if (dSquared > maxDistanceSquared) continue;
if (dSquared < minDistanceSquared) continue; if (dSquared < minDistanceSquared) continue;
// Ignore exclusions.
if (exclusions[atomI].find(atomJ) != exclusions[atomI].end()) continue;
neighbors.push_back( AtomPair(atomI, atomJ) ); neighbors.push_back( AtomPair(atomI, atomJ) );
if (reportSymmetricPairs) if (reportSymmetricPairs)
neighbors.push_back( AtomPair(atomJ, atomI) ); neighbors.push_back( AtomPair(atomJ, atomI) );
...@@ -234,9 +237,9 @@ void OPENMM_EXPORT computeNeighborListVoxelHash( ...@@ -234,9 +237,9 @@ void OPENMM_EXPORT computeNeighborListVoxelHash(
if (!usePeriodic) if (!usePeriodic)
edgeSizeX = edgeSizeY = edgeSizeZ = maxDistance; // TODO - adjust this as needed edgeSizeX = edgeSizeY = edgeSizeZ = maxDistance; // TODO - adjust this as needed
else { else {
edgeSizeX = periodicBoxSize[0]/floor(periodicBoxSize[0]/maxDistance); edgeSizeX = 0.5*periodicBoxSize[0]/floor(periodicBoxSize[0]/maxDistance);
edgeSizeY = periodicBoxSize[1]/floor(periodicBoxSize[1]/maxDistance); edgeSizeY = 0.5*periodicBoxSize[1]/floor(periodicBoxSize[1]/maxDistance);
edgeSizeZ = periodicBoxSize[2]/floor(periodicBoxSize[2]/maxDistance); edgeSizeZ = 0.5*periodicBoxSize[2]/floor(periodicBoxSize[2]/maxDistance);
} }
VoxelHash voxelHash(edgeSizeX, edgeSizeY, edgeSizeZ, periodicBoxSize, usePeriodic); VoxelHash voxelHash(edgeSizeX, edgeSizeY, edgeSizeZ, periodicBoxSize, usePeriodic);
for (AtomIndex atomJ = 0; atomJ < (AtomIndex) nAtoms; ++atomJ) // use "j", because j > i for pairs for (AtomIndex atomJ = 0; atomJ < (AtomIndex) nAtoms; ++atomJ) // use "j", because j > i for pairs
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "PME.h" #include "ReferencePME.h"
#include "fftpack.h" #include "fftpack.h"
using std::vector; using std::vector;
...@@ -317,10 +317,8 @@ pme_update_bsplines(pme_t pme) ...@@ -317,10 +317,8 @@ pme_update_bsplines(pme_t pme)
static void static void
pme_grid_spread_charge(pme_t pme, pme_grid_spread_charge(pme_t pme, vector<RealOpenMM>& charges)
RealOpenMM ** atomParameters)
{ {
static const int QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
int order; int order;
int i; int i;
int ix,iy,iz; int ix,iy,iz;
...@@ -342,7 +340,7 @@ pme_grid_spread_charge(pme_t pme, ...@@ -342,7 +340,7 @@ pme_grid_spread_charge(pme_t pme,
for(i=0;i<pme->natoms;i++) for(i=0;i<pme->natoms;i++)
{ {
q = atomParameters[i][QIndex]; q = charges[i];
/* Grid index for the actual atom position */ /* Grid index for the actual atom position */
x0index = pme->particleindex[i][0]; x0index = pme->particleindex[i][0];
...@@ -523,10 +521,9 @@ pme_reciprocal_convolution(pme_t pme, ...@@ -523,10 +521,9 @@ pme_reciprocal_convolution(pme_t pme,
static void static void
pme_grid_interpolate_force(pme_t pme, pme_grid_interpolate_force(pme_t pme,
const RealOpenMM periodicBoxSize[3], const RealOpenMM periodicBoxSize[3],
RealOpenMM ** atomParameters, vector<RealOpenMM>& charges,
vector<RealVec>& forces) vector<RealVec>& forces)
{ {
static const int QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
int i; int i;
int ix,iy,iz; int ix,iy,iz;
int x0index,y0index,z0index; int x0index,y0index,z0index;
...@@ -558,7 +555,7 @@ pme_grid_interpolate_force(pme_t pme, ...@@ -558,7 +555,7 @@ pme_grid_interpolate_force(pme_t pme,
{ {
fx = fy = fz = 0; fx = fy = fz = 0;
q = atomParameters[i][QIndex]; q = charges[i];
/* Grid index for the actual atom position */ /* Grid index for the actual atom position */
x0index = pme->particleindex[i][0]; x0index = pme->particleindex[i][0];
...@@ -671,7 +668,7 @@ pme_init(pme_t * ppme, ...@@ -671,7 +668,7 @@ pme_init(pme_t * ppme,
int pme_exec(pme_t pme, int pme_exec(pme_t pme,
vector<RealVec>& atomCoordinates, vector<RealVec>& atomCoordinates,
vector<RealVec>& forces, vector<RealVec>& forces,
RealOpenMM ** atomParameters, vector<RealOpenMM>& charges,
const RealOpenMM periodicBoxSize[3], const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy, RealOpenMM * energy,
RealOpenMM pme_virial[3][3]) RealOpenMM pme_virial[3][3])
...@@ -692,7 +689,7 @@ int pme_exec(pme_t pme, ...@@ -692,7 +689,7 @@ int pme_exec(pme_t pme,
pme_update_bsplines(pme); pme_update_bsplines(pme);
/* Spread the charges on grid (using newly calculated bsplines in the pme structure) */ /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
pme_grid_spread_charge(pme,atomParameters); pme_grid_spread_charge(pme, charges);
/* do 3d-fft */ /* do 3d-fft */
fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid); fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
...@@ -704,7 +701,7 @@ int pme_exec(pme_t pme, ...@@ -704,7 +701,7 @@ int pme_exec(pme_t pme,
fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid); fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
/* Get the particle forces from the grid and bsplines in the pme structure */ /* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force(pme,periodicBoxSize,atomParameters,forces); pme_grid_interpolate_force(pme,periodicBoxSize,charges,forces);
return 0; return 0;
} }
......
...@@ -86,7 +86,7 @@ void verifyNeighborList(NeighborList& list, int numParticles, vector<RealVec>& p ...@@ -86,7 +86,7 @@ void verifyNeighborList(NeighborList& list, int numParticles, vector<RealVec>& p
for (int j = i+1; j < numParticles; j++) for (int j = i+1; j < numParticles; j++)
if (distance2(positions[i], positions[j], periodicBoxSize) <= cutoff*cutoff) if (distance2(positions[i], positions[j], periodicBoxSize) <= cutoff*cutoff)
count++; count++;
ASSERT(count == list.size()); ASSERT_EQUAL(count, list.size());
} }
void testPeriodic() { void testPeriodic() {
...@@ -112,16 +112,15 @@ void testPeriodic() { ...@@ -112,16 +112,15 @@ void testPeriodic() {
int main() int main()
{ {
try { try {
testNeighborList(); testNeighborList();
testPeriodic(); testPeriodic();
}
cout << "Test Passed" << endl; catch(const exception& e) {
return 0; cout << "exception: " << e.what() << endl;
}
catch (...) {
cerr << "*** ERROR: Test Failed ***" << endl;
return 1; return 1;
} }
cout << "Done" << endl;
return 0;
} }
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#endif #endif
#include "CpuPmeKernels.h" #include "CpuPmeKernels.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "openmm/internal/hardware.h"
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <smmintrin.h> #include <smmintrin.h>
...@@ -48,78 +49,6 @@ int CpuCalcPmeReciprocalForceKernel::numThreads = 0; ...@@ -48,78 +49,6 @@ int CpuCalcPmeReciprocalForceKernel::numThreads = 0;
#define EXTRACT_FLOAT(v, element) _mm_cvtss_f32(_mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, element))) #define EXTRACT_FLOAT(v, element) _mm_cvtss_f32(_mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, element)))
// Define function to get the number of processors.
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <dlfcn.h>
#else
#ifdef WIN32
#include <windows.h>
#else
#include <dlfcn.h>
#include <unistd.h>
#endif
#endif
static int getNumProcessors() {
#ifdef __APPLE__
int ncpu;
size_t len = 4;
if (sysctlbyname("hw.logicalcpu", &ncpu, &len, NULL, 0) == 0)
return ncpu;
else
return 1;
#else
#ifdef WIN32
SYSTEM_INFO siSysInfo;
int ncpu;
GetSystemInfo(&siSysInfo);
ncpu = siSysInfo.dwNumberOfProcessors;
if (ncpu < 1)
ncpu = 1;
return ncpu;
#else
long nProcessorsOnline = sysconf(_SC_NPROCESSORS_ONLN);
if (nProcessorsOnline == -1)
return 1;
else
return (int) nProcessorsOnline;
#endif
#endif
}
// Define a function to check the CPU's capabilities.
#ifdef _WIN32
#define cpuid __cpuid
#else
static void cpuid(int cpuInfo[4], int infoType){
#ifdef __LP64__
__asm__ __volatile__ (
"cpuid":
"=a" (cpuInfo[0]),
"=b" (cpuInfo[1]),
"=c" (cpuInfo[2]),
"=d" (cpuInfo[3]) :
"a" (infoType)
);
#else
__asm__ __volatile__ (
"pushl %%ebx\n"
"cpuid\n"
"movl %%ebx, %1\n"
"popl %%ebx\n" :
"=a" (cpuInfo[0]),
"=r" (cpuInfo[1]),
"=c" (cpuInfo[2]),
"=d" (cpuInfo[3]) :
"a" (infoType)
);
#endif
}
#endif
static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) { static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3 periodicBoxSize) {
float temp[4]; float temp[4];
__m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]); __m128 boxSize = _mm_set_ps(0, (float) periodicBoxSize[2], (float) periodicBoxSize[1], (float) periodicBoxSize[0]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment