Unverified Commit 109f6b25 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Very minor optimizations (#3602)

parent fb036060
...@@ -210,7 +210,7 @@ protected: ...@@ -210,7 +210,7 @@ protected:
bool includeEnergy; bool includeEnergy;
float inverseRcut6; float inverseRcut6;
float inverseRcut6Expterm; float inverseRcut6Expterm;
std::atomic<int> atomicCounter; std::atomic<int> atomicCounter, atomicCounter2;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
static const int NUM_TABLE_POINTS; static const int NUM_TABLE_POINTS;
......
...@@ -92,11 +92,8 @@ public: ...@@ -92,11 +92,8 @@ public:
voxelSizeZ = (maxz-minz)/nz; voxelSizeZ = (maxz-minz)/nz;
} }
bins.resize(ny); bins.resize(ny);
for (int i = 0; i < ny; i++) { for (int i = 0; i < ny; i++)
bins[i].resize(nz); bins[i].resize(nz);
for (int j = 0; j < nz; j++)
bins[i][j].resize(0);
}
} }
/** /**
......
...@@ -391,20 +391,13 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -391,20 +391,13 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
threadEnergy.resize(threads.getNumThreads()); threadEnergy.resize(threads.getNumThreads());
atomicCounter = 0; atomicCounter = 0;
atomicCounter2 = 0;
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); }); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to subtract the exclusions.
if (ewald || pme) {
atomicCounter = 0;
threads.resumeThreads();
threads.waitForThreads();
}
// Combine the energies from all the threads. // Combine the energies from all the threads.
if (totalEnergy != NULL) { if (totalEnergy != NULL) {
...@@ -436,10 +429,9 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -436,10 +429,9 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum. // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
threads.syncThreads();
const int groupSize = max(1, numberOfAtoms/(10*numThreads)); const int groupSize = max(1, numberOfAtoms/(10*numThreads));
while (true) { while (true) {
int start = atomicCounter.fetch_add(groupSize); int start = atomicCounter2.fetch_add(groupSize);
if (start >= numberOfAtoms) if (start >= numberOfAtoms)
break; break;
int end = min(start+groupSize, numberOfAtoms); int end = min(start+groupSize, numberOfAtoms);
......
...@@ -1024,7 +1024,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1024,7 +1024,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
replacements["CHARGE1"] = prefix+"charge1"; replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2"; replacements["CHARGE2"] = prefix+"charge2";
} }
if (hasCoulomb) if (hasCoulomb && !usePosqCharges)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer())); cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) { if (hasLJ) {
......
...@@ -968,7 +968,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -968,7 +968,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
replacements["CHARGE1"] = prefix+"charge1"; replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2"; replacements["CHARGE2"] = prefix+"charge2";
} }
if (hasCoulomb) if (hasCoulomb && !usePosqCharges)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDeviceBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDeviceBuffer()));
sigmaEpsilon.initialize<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon.initialize<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) { if (hasLJ) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment