"ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "7132e1d3f68a841556b09fa5dd15d489daa1094b"
Commit 59854c5e authored by leeping's avatar leeping
Browse files

Merge branch 'master' of github.com:SimTk/openmm

parents 8167c79b 8e11ed9c
...@@ -296,9 +296,9 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS) ...@@ -296,9 +296,9 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
# On Linux need to link to libdl # On Linux need to link to libdl
FIND_LIBRARY(DL_LIBRARY dl) FIND_LIBRARY(DL_LIBRARY dl)
IF(DL_LIBRARY) IF(DL_LIBRARY)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${DL_LIBRARY}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${DL_LIBRARY} ${PTHREADS_LIB})
IF(OPENMM_BUILD_STATIC_LIB) IF(OPENMM_BUILD_STATIC_LIB)
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${DL_LIBRARY}) TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${DL_LIBRARY} ${PTHREADS_LIB})
ENDIF(OPENMM_BUILD_STATIC_LIB) ENDIF(OPENMM_BUILD_STATIC_LIB)
ENDIF(DL_LIBRARY) ENDIF(DL_LIBRARY)
IF(WIN32) IF(WIN32)
......
#ifndef OPENMM_THREAD_POOL_H_
#define OPENMM_THREAD_POOL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "windowsExport.h"
#include <pthread.h>
#include <vector>
namespace OpenMM {
/**
* A ThreadPool creates a set of worker threads that can be used to execute tasks in parallel.
* After creating a ThreadPool, call execute() to start a task running then waitForThreads()
* to block until all threads have finished. You also can synchronize the threads in the middle
* of the task by having them call syncThreads(). In this case, the parent thread should call
* waitForThreads() an additional time; each call waits until all worker threads have reached the
* next syncThreads(), and the final call waits until they exit from the Task's execute() method.
* After calling waitForThreads() to block at a synchronization point, the parent thread should
* call resumeThreads() to instruct the worker threads to resume.
*/
class OPENMM_EXPORT ThreadPool {
public:
class Task;
class ThreadData;
ThreadPool();
~ThreadPool();
/**
* Get the number of worker threads in the pool.
*/
int getNumThreads() const;
/**
* Execute a Task in parallel on the worker threads.
*/
void execute(Task& task);
/**
* This is called by the worker threads to block until all threads have reached the same point
* and the master thread instructs them to continue by calling resumeThreads().
*/
void syncThreads();
/**
* This is called by the master thread to wait until all threads have completed the Task. Alternatively,
* if the threads call syncThreads(), this blocks until all threads have reached the synchronization point.
*/
void waitForThreads();
/**
* Instruct the threads to resume running after blocking at a synchronization point.
*/
void resumeThreads();
private:
bool isDeleted;
int numThreads, waitCount;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
};
/**
* This defines a task that can be executed in parallel by the worker threads.
*/
class OPENMM_EXPORT ThreadPool::Task {
public:
/**
* Execute the task on each thread.
*
* @param pool the ThreadPool being used to execute the task
* @param threadIndex the index of the thread invoking this method
*/
virtual void execute(ThreadPool& pool, int threadIndex) = 0;
};
} // namespace OpenMM
#endif // OPENMM_THREAD_POOL_H_
...@@ -212,6 +212,10 @@ static inline float dot4(fvec4 v1, fvec4 v2) { ...@@ -212,6 +212,10 @@ static inline float dot4(fvec4 v1, fvec4 v2) {
return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1)); return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
} }
static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
_MM_TRANSPOSE4_PS(v1, v2, v3, v4);
}
// Functions that operate on ivec4s. // Functions that operate on ivec4s.
static inline ivec4 min(ivec4 v1, ivec4 v2) { static inline ivec4 min(ivec4 v1, ivec4 v2) {
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/hardware.h"
using namespace std;
namespace OpenMM {
class ThreadPool::ThreadData {
public:
ThreadData(ThreadPool& owner, int index) : owner(owner), index(index), isDeleted(false) {
}
ThreadPool& owner;
int index;
bool isDeleted;
Task* currentTask;
};
static void* threadBody(void* args) {
ThreadPool::ThreadData& data = *reinterpret_cast<ThreadPool::ThreadData*>(args);
while (true) {
// Wait for the signal to start running.
data.owner.syncThreads();
if (data.isDeleted)
break;
data.currentTask->execute(data.owner, data.index);
}
delete &data;
return 0;
}
ThreadPool::ThreadPool() {
numThreads = getNumProcessors();
pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL);
pthread_mutex_init(&lock, NULL);
thread.resize(numThreads);
pthread_mutex_lock(&lock);
waitCount = 0;
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(*this, i);
data->isDeleted = false;
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
}
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
}
ThreadPool::~ThreadPool() {
for (int i = 0; i < (int) threadData.size(); i++)
threadData[i]->isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
pthread_join(thread[i], NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
}
int ThreadPool::getNumThreads() const {
return numThreads;
}
void ThreadPool::execute(Task& task) {
for (int i = 0; i < (int) threadData.size(); i++)
threadData[i]->currentTask = &task;
resumeThreads();
}
void ThreadPool::syncThreads() {
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
}
void ThreadPool::waitForThreads() {
pthread_mutex_lock(&lock);
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
}
void ThreadPool::resumeThreads() {
pthread_mutex_lock(&lock);
waitCount = 0;
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
}
} // namespace OpenMM
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "CpuNonbondedForce.h" #include "CpuNonbondedForce.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
namespace OpenMM { namespace OpenMM {
...@@ -90,6 +91,7 @@ private: ...@@ -90,6 +91,7 @@ private:
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNeighborList neighborList; CpuNeighborList neighborList;
CpuNonbondedForce nonbonded; CpuNonbondedForce nonbonded;
ThreadPool threads;
Kernel optimizedPme; Kernel optimizedPme;
}; };
......
#ifndef OPENMM_CPU_NEIGHBORLIST_H_ #ifndef OPENMM_CPU_NEIGHBORLIST_H_
#define OPENMM_CPU_NEIGHBORLIST_H_ #define OPENMM_CPU_NEIGHBORLIST_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "windowsExportCpu.h" #include "windowsExportCpu.h"
#include <pthread.h> #include "openmm/internal/ThreadPool.h"
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -11,13 +42,12 @@ namespace OpenMM { ...@@ -11,13 +42,12 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuNeighborList { class OPENMM_EXPORT_CPU CpuNeighborList {
public: public:
class ThreadData; class ThreadTask;
class Voxels; class Voxels;
static const int BlockSize; static const int BlockSize;
CpuNeighborList(); CpuNeighborList();
~CpuNeighborList();
void computeNeighborList(int numAtoms, const std::vector<float>& atomLocations, const std::vector<std::set<int> >& exclusions, void computeNeighborList(int numAtoms, const std::vector<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
const float* periodicBoxSize, bool usePeriodic, float maxDistance); const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads);
int getNumBlocks() const; int getNumBlocks() const;
const std::vector<int>& getSortedAtoms() const; const std::vector<int>& getSortedAtoms() const;
const std::vector<int>& getBlockNeighbors(int blockIndex) const; const std::vector<int>& getBlockNeighbors(int blockIndex) const;
...@@ -25,25 +55,12 @@ public: ...@@ -25,25 +55,12 @@ public:
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
*/ */
void threadComputeNeighborList(ThreadPool& threads, int threadIndex);
void runThread(int index); void runThread(int index);
private: private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void threadWait();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void advanceThreads();
bool isDeleted;
int numThreads, waitCount;
std::vector<int> sortedAtoms; std::vector<int> sortedAtoms;
std::vector<std::vector<int> > blockNeighbors; std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions; std::vector<std::vector<char> > blockExclusions;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
float minx, maxx, miny, maxy, minz, maxz; float minx, maxx, miny, maxy, minz, maxz;
std::vector<std::pair<int, int> > atomBins; std::vector<std::pair<int, int> > atomBins;
...@@ -58,4 +75,4 @@ private: ...@@ -58,4 +75,4 @@ private:
} // namespace OpenMM } // namespace OpenMM
#endif // OPENMM_REFERENCE_NEIGHBORLIST_H_ #endif // OPENMM_CPU_NEIGHBORLIST_H_
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "ReferencePairIxn.h" #include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include <pthread.h>
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -38,7 +38,7 @@ namespace OpenMM { ...@@ -38,7 +38,7 @@ namespace OpenMM {
class CpuNonbondedForce { class CpuNonbondedForce {
public: public:
class ThreadData; class ComputeDirectTask;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -48,14 +48,6 @@ class CpuNonbondedForce { ...@@ -48,14 +48,6 @@ class CpuNonbondedForce {
CpuNonbondedForce(); CpuNonbondedForce();
/**---------------------------------------------------------------------------------------
Destructor
--------------------------------------------------------------------------------------- */
~CpuNonbondedForce();
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use a cutoff. Set the force to use a cutoff.
...@@ -130,7 +122,7 @@ class CpuNonbondedForce { ...@@ -130,7 +122,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, std::vector<RealVec>& atomCoordinates, void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<RealVec>& forces, float* totalEnergy) const; std::vector<RealVec>& forces, float* totalEnergy) const;
...@@ -140,21 +132,23 @@ class CpuNonbondedForce { ...@@ -140,21 +132,23 @@ class CpuNonbondedForce {
@param numberOfAtoms number of atoms @param numberOfAtoms number of atoms
@param posq atom coordinates and charges @param posq atom coordinates and charges
@param atomCoordinates atom coordinates (periodic boundary conditions not applied)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon)) @param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices @param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added) @param forces force array (forces added)
@param totalEnergy total energy @param totalEnergy total energy
@param threads the thread pool to use
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<std::pair<float, float> >& atomParameters, void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, float* forces, float* totalEnergy); const std::vector<std::set<int> >& exclusions, float* forces, float* totalEnergy, ThreadPool& threads);
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
*/ */
void runThread(int index, std::vector<float>& threadForce, double& threadEnergy); void threadComputeDirect(ThreadPool& threads, int threadIndex);
private: private:
bool cutoff; bool cutoff;
...@@ -171,15 +165,12 @@ private: ...@@ -171,15 +165,12 @@ private:
int meshDim[3]; int meshDim[3];
std::vector<float> ewaldScaleTable; std::vector<float> ewaldScaleTable;
float ewaldDX, ewaldDXInv; float ewaldDX, ewaldDXInv;
bool isDeleted; std::vector<std::vector<float> > threadForce;
int numThreads, waitCount; std::vector<double> threadEnergy;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
float* posq; float* posq;
RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters; std::pair<float, float> const* atomParameters;
std::set<int> const* exclusions; std::set<int> const* exclusions;
bool includeEnergy; bool includeEnergy;
...@@ -230,6 +221,12 @@ private: ...@@ -230,6 +221,12 @@ private:
*/ */
void getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const; void getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
......
...@@ -203,11 +203,11 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -203,11 +203,11 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
// Convert the positions to single precision. // Convert the positions to single precision.
if (periodic) if (periodic || ewald || pme)
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) { for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j]; RealOpenMM x = posData[i][j];
double base = floor(x/boxSize[j]+0.5)*boxSize[j]; double base = floor(x/boxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base); posq[4*i+j] = (float) (x-base);
} }
else else
...@@ -244,6 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -244,6 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
int numMoved = moved.size(); int numMoved = moved.size();
double cutoff2 = nonbondedCutoff*nonbondedCutoff; double cutoff2 = nonbondedCutoff*nonbondedCutoff;
double paddedCutoff2 = (nonbondedCutoff+padding)*(nonbondedCutoff+padding);
for (int i = 1; i < numMoved && !needRecompute; i++) for (int i = 1; i < numMoved && !needRecompute; i++)
for (int j = 0; j < i; j++) { for (int j = 0; j < i; j++) {
RealVec delta = posData[moved[i]]-posData[moved[j]]; RealVec delta = posData[moved[i]]-posData[moved[j]];
...@@ -251,7 +252,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -251,7 +252,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
// These particles should interact. See if they are in the neighbor list. // These particles should interact. See if they are in the neighbor list.
RealVec oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]]; RealVec oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]];
if (oldDelta.dot(oldDelta) > cutoff2) { if (oldDelta.dot(oldDelta) > paddedCutoff2) {
needRecompute = true; needRecompute = true;
break; break;
} }
...@@ -259,7 +260,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -259,7 +260,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
} }
if (needRecompute) { if (needRecompute) {
neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, periodic || ewald || pme, nonbondedCutoff+padding); neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, periodic || ewald || pme, nonbondedCutoff+padding, threads);
lastPositions = posData; lastPositions = posData;
} }
nonbonded.setUseCutoff(nonbondedCutoff, neighborList, rfDielectric); nonbonded.setUseCutoff(nonbondedCutoff, neighborList, rfDielectric);
...@@ -278,7 +279,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -278,7 +279,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded.setUseSwitchingFunction(switchingDistance); nonbonded.setUseSwitchingFunction(switchingDistance);
float nonbondedEnergy = 0; float nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
nonbonded.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, &forces[0], includeEnergy ? &nonbondedEnergy : NULL); nonbonded.calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, &forces[0], includeEnergy ? &nonbondedEnergy : NULL, threads);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &forces[0], numParticles); PmeIO io(&posq[0], &forces[0], numParticles);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
...@@ -23,8 +54,6 @@ public: ...@@ -23,8 +54,6 @@ public:
int y; int y;
}; };
typedef pair<const float*, int> VoxelItem;
/** /**
* This data structure organizes the particles spatially. It divides them into bins along the x and y axes, * This data structure organizes the particles spatially. It divides them into bins along the x and y axes,
* then sorts each bin along the z axis so ranges can be identified quickly with a binary search. * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
...@@ -60,7 +89,7 @@ public: ...@@ -60,7 +89,7 @@ public:
*/ */
void insert(const int& atom, const float* location) { void insert(const int& atom, const float* location) {
VoxelIndex voxelIndex = getVoxelIndex(location); VoxelIndex voxelIndex = getVoxelIndex(location);
bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], VoxelItem(location, atom))); bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], atom));
} }
/** /**
...@@ -76,7 +105,7 @@ public: ...@@ -76,7 +105,7 @@ public:
* Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value. * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
*/ */
int findLowerBound(int x, int y, double z) const { int findLowerBound(int x, int y, double z) const {
const vector<pair<float, VoxelItem> >& bin = bins[x][y]; const vector<pair<float, int> >& bin = bins[x][y];
int lower = 0; int lower = 0;
int upper = bin.size(); int upper = bin.size();
while (lower < upper) { while (lower < upper) {
...@@ -93,7 +122,7 @@ public: ...@@ -93,7 +122,7 @@ public:
* Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value. * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
*/ */
int findUpperBound(int x, int y, double z) const { int findUpperBound(int x, int y, double z) const {
const vector<pair<float, VoxelItem> >& bin = bins[x][y]; const vector<pair<float, int> >& bin = bins[x][y];
int lower = 0; int lower = 0;
int upper = bin.size(); int upper = bin.size();
while (lower < upper) { while (lower < upper) {
...@@ -148,14 +177,12 @@ public: ...@@ -148,14 +177,12 @@ public:
if (usePeriodic) { if (usePeriodic) {
endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1); endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1); endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
numRanges = 2;
} }
else { else {
startx = max(startx, 0); startx = max(startx, 0);
starty = max(starty, 0); starty = max(starty, 0);
endx = min(endx, nx-1); endx = min(endx, nx-1);
endy = min(endy, ny-1); endy = min(endy, ny-1);
numRanges = 1;
} }
int lastSortedIndex = BlockSize*(blockIndex+1); int lastSortedIndex = BlockSize*(blockIndex+1);
VoxelIndex voxelIndex(0, 0); VoxelIndex voxelIndex(0, 0);
...@@ -175,10 +202,12 @@ public: ...@@ -175,10 +202,12 @@ public:
float dz = maxDistance+blockWidth[2]; float dz = maxDistance+blockWidth[2];
dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy)); dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
bool needPeriodic = (voxelIndex.x != x || voxelIndex.y != y || centerPos[2]-dz < 0.0f || centerPos[2]+dz > periodicBoxSize[2]);
int rangeStart[2]; int rangeStart[2];
int rangeEnd[2]; int rangeEnd[2];
rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz); rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
if (usePeriodic) { if (needPeriodic) {
numRanges = 2;
rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz); rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
if (rangeStart[0] > 0) { if (rangeStart[0] > 0) {
rangeStart[1] = 0; rangeStart[1] = 0;
...@@ -189,22 +218,24 @@ public: ...@@ -189,22 +218,24 @@ public:
rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size(); rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
} }
} }
else else {
numRanges = 1;
rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz); rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
}
// Loop over atoms and check to see if they are neighbors of this block. // Loop over atoms and check to see if they are neighbors of this block.
for (int range = 0; range < numRanges; range++) { for (int range = 0; range < numRanges; range++) {
for (int item = rangeStart[range]; item < rangeEnd[range]; item++) { for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second.second; const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second;
// Avoid duplicate entries. // Avoid duplicate entries.
if (sortedIndex >= lastSortedIndex) if (sortedIndex >= lastSortedIndex)
continue; continue;
fvec4 atomPos(bins[voxelIndex.x][voxelIndex.y][item].second.first); fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
fvec4 delta = atomPos-centerPos; fvec4 delta = atomPos-centerPos;
if (usePeriodic) { if (needPeriodic) {
fvec4 base = round(delta*invBoxSize)*boxSize; fvec4 base = round(delta*invBoxSize)*boxSize;
delta = delta-base; delta = delta-base;
} }
...@@ -221,7 +252,7 @@ public: ...@@ -221,7 +252,7 @@ public:
for (int k = 0; k < (int) blockAtoms.size(); k++) { for (int k = 0; k < (int) blockAtoms.size(); k++) {
fvec4 pos1(&atomLocations[4*blockAtoms[k]]); fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
delta = atomPos-pos1; delta = atomPos-pos1;
if (usePeriodic) { if (needPeriodic) {
fvec4 base = round(delta*invBoxSize)*boxSize; fvec4 base = round(delta*invBoxSize)*boxSize;
delta = delta-base; delta = delta-base;
} }
...@@ -254,57 +285,24 @@ private: ...@@ -254,57 +285,24 @@ private:
int nx, ny; int nx, ny;
const float* periodicBoxSize; const float* periodicBoxSize;
const bool usePeriodic; const bool usePeriodic;
vector<vector<vector<pair<float, VoxelItem> > > > bins; vector<vector<vector<pair<float, int> > > > bins;
}; };
class CpuNeighborList::ThreadData { class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public: public:
ThreadData(int index, CpuNeighborList& owner) : index(index), owner(owner) { ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
} }
int index;
CpuNeighborList& owner; CpuNeighborList& owner;
}; };
static void* threadBody(void* args) {
CpuNeighborList::ThreadData& data = *reinterpret_cast<CpuNeighborList::ThreadData*>(args);
data.owner.runThread(data.index);
delete &data;
return 0;
}
CpuNeighborList::CpuNeighborList() { CpuNeighborList::CpuNeighborList() {
isDeleted = false;
numThreads = getNumProcessors();
pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL);
pthread_mutex_init(&lock, NULL);
thread.resize(numThreads);
pthread_mutex_lock(&lock);
waitCount = 0;
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(i, *this);
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
}
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
}
CpuNeighborList::~CpuNeighborList() {
isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
pthread_join(thread[i], NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
} }
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions, void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
const float* periodicBoxSize, bool usePeriodic, float maxDistance) { const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads) {
int numBlocks = (numAtoms+BlockSize-1)/BlockSize; int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
blockNeighbors.resize(numBlocks); blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks); blockExclusions.resize(numBlocks);
...@@ -338,8 +336,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato ...@@ -338,8 +336,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
// Sort the atoms based on a Hilbert curve. // Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms); atomBins.resize(numAtoms);
pthread_mutex_lock(&lock); ThreadTask task(*this);
advanceThreads(); threads.execute(task);
threads.waitForThreads();
sort(atomBins.begin(), atomBins.end()); sort(atomBins.begin(), atomBins.end());
// Build the voxel hash. // Build the voxel hash.
...@@ -362,8 +361,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato ...@@ -362,8 +361,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
advanceThreads(); threads.resumeThreads();
pthread_mutex_unlock(&lock); threads.waitForThreads();
// Add padding atoms to fill up the last block. // Add padding atoms to fill up the last block.
...@@ -395,20 +394,14 @@ const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) con ...@@ -395,20 +394,14 @@ const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) con
} }
void CpuNeighborList::runThread(int index) { void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
while (true) {
// Wait for the signal to start running.
threadWait();
if (isDeleted)
break;
// Compute the positions of atoms along the Hilbert curve. // Compute the positions of atoms along the Hilbert curve.
float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f; float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
float invBinWidth = 1.0f/binWidth; float invBinWidth = 1.0f/binWidth;
bitmask_t coords[3]; bitmask_t coords[3];
for (int i = index; i < numAtoms; i += numThreads) { int numThreads = threads.getNumThreads();
for (int i = threadIndex; i < numAtoms; i += numThreads) {
const float* pos = &atomLocations[4*i]; const float* pos = &atomLocations[4*i];
coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth); coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth); coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
...@@ -416,26 +409,22 @@ void CpuNeighborList::runThread(int index) { ...@@ -416,26 +409,22 @@ void CpuNeighborList::runThread(int index) {
int bin = (int) hilbert_c2i(3, 8, coords); int bin = (int) hilbert_c2i(3, 8, coords);
atomBins[i] = pair<int, int>(bin, i); atomBins[i] = pair<int, int>(bin, i);
} }
threadWait(); threads.syncThreads();
// Compute this thread's subset of neighbors. // Compute this thread's subset of neighbors.
int numBlocks = blockNeighbors.size(); int numBlocks = blockNeighbors.size();
vector<int> blockAtoms; vector<int> blockAtoms;
for (int i = index; i < numBlocks; i += numThreads) { for (int i = threadIndex; i < numBlocks; i += numThreads) {
{ // Find the atoms in this block and compute their bounding box.
int firstIndex = BlockSize*i; int firstIndex = BlockSize*i;
int atomsInBlock = min(BlockSize, numAtoms-firstIndex); int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
blockAtoms.resize(atomsInBlock); blockAtoms.resize(atomsInBlock);
for (int j = 0; j < atomsInBlock; j++) for (int j = 0; j < atomsInBlock; j++)
blockAtoms[j] = sortedAtoms[firstIndex+j]; blockAtoms[j] = sortedAtoms[firstIndex+j];
}
int firstIndex = BlockSize*i;
fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]); fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
fvec4 maxPos = minPos; fvec4 maxPos = minPos;
int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
for (int j = 1; j < atomsInBlock; j++) { for (int j = 1; j < atomsInBlock; j++) {
fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]); fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
minPos = min(minPos, pos); minPos = min(minPos, pos);
...@@ -455,23 +444,6 @@ void CpuNeighborList::runThread(int index) { ...@@ -455,23 +444,6 @@ void CpuNeighborList::runThread(int index) {
} }
} }
} }
}
}
void CpuNeighborList::threadWait() {
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
}
void CpuNeighborList::advanceThreads() {
waitCount = 0;
pthread_cond_broadcast(&startCondition);
while (waitCount < numThreads) {
pthread_cond_wait(&endCondition, &lock);
}
} }
} // namespace OpenMM } // namespace OpenMM
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
#include "CpuNonbondedForce.h" #include "CpuNonbondedForce.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "ReferencePME.h" #include "ReferencePME.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/SplineFitter.h" #include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
...@@ -44,23 +43,16 @@ using namespace OpenMM; ...@@ -44,23 +43,16 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M)); const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 1025; const int CpuNonbondedForce::NUM_TABLE_POINTS = 1025;
class CpuNonbondedForce::ThreadData { class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public: public:
ThreadData(int index, CpuNonbondedForce& owner) : index(index), owner(owner) { ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
} }
int index;
CpuNonbondedForce& owner; CpuNonbondedForce& owner;
vector<float> threadForce;
double threadEnergy;
}; };
static void* threadBody(void* args) {
CpuNonbondedForce::ThreadData& data = *reinterpret_cast<CpuNonbondedForce::ThreadData*>(args);
data.owner.runThread(data.index, data.threadForce, data.threadEnergy);
delete &data;
return 0;
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor CpuNonbondedForce constructor
...@@ -68,40 +60,6 @@ static void* threadBody(void* args) { ...@@ -68,40 +60,6 @@ static void* threadBody(void* args) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) {
isDeleted = false;
numThreads = getNumProcessors();
pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL);
pthread_mutex_init(&lock, NULL);
thread.resize(numThreads);
pthread_mutex_lock(&lock);
waitCount = 0;
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(i, *this);
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
}
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce destructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::~CpuNonbondedForce(){
isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
pthread_join(thread[i], NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -219,7 +177,7 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() { ...@@ -219,7 +177,7 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() {
} }
} }
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
vector<RealVec>& forces, float* totalEnergy) const { vector<RealVec>& forces, float* totalEnergy) const {
typedef std::complex<float> d_complex; typedef std::complex<float> d_complex;
...@@ -333,34 +291,35 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v ...@@ -333,34 +291,35 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
} }
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<pair<float, float> >& atomParameters, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, float* forces, float* totalEnergy) { const vector<set<int> >& exclusions, float* forces, float* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
this->posq = posq; this->posq = posq;
this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0]; this->atomParameters = &atomParameters[0];
this->exclusions = &exclusions[0]; this->exclusions = &exclusions[0];
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
threadEnergy.resize(threads.getNumThreads());
threadForce.resize(threads.getNumThreads());
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
pthread_mutex_lock(&lock); ComputeDirectTask task(*this);
waitCount = 0; threads.execute(task);
pthread_cond_broadcast(&startCondition); threads.waitForThreads();
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
// Combine the results from all the threads. // Combine the results from all the threads.
double directEnergy = 0; double directEnergy = 0;
int numThreads = threads.getNumThreads();
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
directEnergy += threadData[i]->threadEnergy; directEnergy += threadEnergy[i];
for (int i = 0; i < numberOfAtoms; i++) { for (int i = 0; i < numberOfAtoms; i++) {
fvec4 f(forces+4*i); fvec4 f(forces+4*i);
for (int j = 0; j < numThreads; j++) for (int j = 0; j < numThreads; j++)
f += fvec4(&threadData[j]->threadForce[4*i]); f += fvec4(&threadForce[j][4*i]);
f.store(forces+4*i); f.store(forces+4*i);
} }
...@@ -368,45 +327,35 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -368,45 +327,35 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
*totalEnergy += (float) directEnergy; *totalEnergy += (float) directEnergy;
} }
void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex) {
void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& threadEnergy) {
while (true) {
// Wait for the signal to start running.
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
if (isDeleted)
break;
// Compute this thread's subset of interactions. // Compute this thread's subset of interactions.
threadEnergy = 0; int numThreads = threads.getNumThreads();
double* energyPtr = (includeEnergy ? &threadEnergy : NULL); threadEnergy[threadIndex] = 0;
threadForce.resize(4*numberOfAtoms, 0.0f); double* energyPtr = (includeEnergy ? &threadEnergy[threadIndex] : NULL);
threadForce[threadIndex].resize(4*numberOfAtoms, 0.0f);
float* forces = &threadForce[threadIndex][0];
for (int i = 0; i < 4*numberOfAtoms; i++) for (int i = 0; i < 4*numberOfAtoms; i++)
threadForce[i] = 0.0f; forces[i] = 0.0f;
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0); fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0); fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
if (ewald || pme) { if (ewald || pme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
for (int i = index; i < neighborList->getNumBlocks(); i += numThreads) for (int i = threadIndex; i < neighborList->getNumBlocks(); i += numThreads)
calculateBlockEwaldIxn(i, &threadForce[0], energyPtr, boxSize, invBoxSize); calculateBlockEwaldIxn(i, forces, energyPtr, boxSize, invBoxSize);
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum. // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0); fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0); fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
for (int i = index; i < numberOfAtoms; i += numThreads) for (int i = threadIndex; i < numberOfAtoms; i += numThreads) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) { for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) { if (*iter > i) {
int j = *iter; int j = *iter;
fvec4 deltaR; fvec4 deltaR;
fvec4 posI(posq+4*i); fvec4 posJ((float) atomCoordinates[j][0], (float) atomCoordinates[j][1], (float) atomCoordinates[j][2], 0.0f);
fvec4 posJ(posq+4*j);
float r2; float r2;
getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize); getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize);
float r = sqrtf(r2); float r = sqrtf(r2);
...@@ -417,27 +366,27 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& ...@@ -417,27 +366,27 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
float dEdR = (float) (chargeProd * inverseR * inverseR * inverseR); float dEdR = (float) (chargeProd * inverseR * inverseR * inverseR);
dEdR = (float) (dEdR * (1.0f-erfcAlphaR-TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR))); dEdR = (float) (dEdR * (1.0f-erfcAlphaR-TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR)));
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(&threadForce[4*i])-result).store(&threadForce[4*i]); (fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(&threadForce[4*j])+result).store(&threadForce[4*j]); (fvec4(forces+4*j)+result).store(forces+4*j);
if (includeEnergy) if (includeEnergy)
threadEnergy -= chargeProd*inverseR*(1.0f-erfcAlphaR); threadEnergy[threadIndex] -= chargeProd*inverseR*(1.0f-erfcAlphaR);
}
} }
} }
} }
else if (cutoff) { else if (cutoff) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
for (int i = index; i < neighborList->getNumBlocks(); i += numThreads) for (int i = threadIndex; i < neighborList->getNumBlocks(); i += numThreads)
calculateBlockIxn(i, &threadForce[0], energyPtr, boxSize, invBoxSize); calculateBlockIxn(i, forces, energyPtr, boxSize, invBoxSize);
} }
else { else {
// Loop over all atom pairs // Loop over all atom pairs
for (int i = index; i < numberOfAtoms; i += numThreads){ for (int i = threadIndex; i < numberOfAtoms; i += numThreads){
for (int j = i+1; j < numberOfAtoms; j++) for (int j = i+1; j < numberOfAtoms; j++)
if (exclusions[j].find(i) == exclusions[j].end()) if (exclusions[j].find(i) == exclusions[j].end())
calculateOneIxn(i, j, &threadForce[0], energyPtr, boxSize, invBoxSize); calculateOneIxn(i, j, forces, energyPtr, boxSize, invBoxSize);
}
} }
} }
} }
...@@ -507,17 +456,27 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double* ...@@ -507,17 +456,27 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]); blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
blockAtomForce[i] = fvec4(0.0f); blockAtomForce[i] = fvec4(0.0f);
} }
fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]);
fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]);
fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]);
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = false;
if (periodic) {
for (int i = 0; i < 4 && !needPeriodic; i++)
for (int j = 0; j < 3; j++)
if (blockAtomPosq[i][j]-cutoffDistance < 0.0 || blockAtomPosq[i][j]+cutoffDistance > boxSize[j]) {
needPeriodic = true;
break;
}
}
// Loop over neighbors for this block. // Loop over neighbors for this block.
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex); const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
float blockAtomR2[4];
bool include[4]; bool include[4];
fvec4 blockAtomDelta[4];
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
// Load the next neighbor. // Load the next neighbor.
...@@ -527,9 +486,10 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double* ...@@ -527,9 +486,10 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
bool any = false; bool any = false;
fvec4 dx, dy, dz, r2;
getDeltaR(atomPosq, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
getDeltaR(atomPosq, blockAtomPosq[j], blockAtomDelta[j], blockAtomR2[j], periodic, boxSize, invBoxSize); include[j] = (((exclusions[i]>>j)&1) == 0 && (!cutoff || r2[j] < cutoffDistance*cutoffDistance));
include[j] = (((exclusions[i]>>j)&1) == 0 && (!cutoff || blockAtomR2[j] < cutoffDistance*cutoffDistance));
any |= include[j]; any |= include[j];
} }
if (!any) if (!any)
...@@ -537,7 +497,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double* ...@@ -537,7 +497,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the interactions. // Compute the interactions.
fvec4 r2(blockAtomR2);
fvec4 r = sqrt(r2); fvec4 r = sqrt(r2);
fvec4 inverseR = fvec4(1.0f)/r; fvec4 inverseR = fvec4(1.0f)/r;
fvec4 switchValue(1.0f), switchDeriv(0.0f); fvec4 switchValue(1.0f), switchDeriv(0.0f);
...@@ -578,12 +537,13 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double* ...@@ -578,12 +537,13 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Accumulate forces. // Accumulate forces.
fvec4 result[4] = {dx*dEdR, dy*dEdR, dz*dEdR, 0.0f};
transpose(result[0], result[1], result[2], result[3]);
fvec4 atomForce(forces+4*atom); fvec4 atomForce(forces+4*atom);
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
if (include[j]) { if (include[j]) {
fvec4 result = blockAtomDelta[j]*dEdR[j]; blockAtomForce[j] += result[j];
blockAtomForce[j] += result; atomForce -= result[j];
atomForce -= result;
} }
} }
atomForce.store(forces+4*atom); atomForce.store(forces+4*atom);
...@@ -606,17 +566,25 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do ...@@ -606,17 +566,25 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]); blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
blockAtomForce[i] = fvec4(0.0f); blockAtomForce[i] = fvec4(0.0f);
} }
fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]);
fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]);
fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]);
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = false;
for (int i = 0; i < 4 && !needPeriodic; i++)
for (int j = 0; j < 3; j++)
if (blockAtomPosq[i][j]-cutoffDistance < 0.0 || blockAtomPosq[i][j]+cutoffDistance > boxSize[j]) {
needPeriodic = true;
break;
}
// Loop over neighbors for this block. // Loop over neighbors for this block.
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex); const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
float blockAtomR2[4];
bool include[4]; bool include[4];
fvec4 blockAtomDelta[4];
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
// Load the next neighbor. // Load the next neighbor.
...@@ -626,9 +594,10 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do ...@@ -626,9 +594,10 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
bool any = false; bool any = false;
fvec4 dx, dy, dz, r2;
getDeltaR(atomPosq, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
getDeltaR(atomPosq, blockAtomPosq[j], blockAtomDelta[j], blockAtomR2[j], periodic, boxSize, invBoxSize); include[j] = (((exclusions[i]>>j)&1) == 0 && r2[j] < cutoffDistance*cutoffDistance);
include[j] = (((exclusions[i]>>j)&1) == 0 && blockAtomR2[j] < cutoffDistance*cutoffDistance);
any |= include[j]; any |= include[j];
} }
if (!any) if (!any)
...@@ -636,7 +605,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do ...@@ -636,7 +605,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the interactions. // Compute the interactions.
fvec4 r2(blockAtomR2);
fvec4 r = sqrt(r2); fvec4 r = sqrt(r2);
fvec4 inverseR = fvec4(1.0f)/r; fvec4 inverseR = fvec4(1.0f)/r;
fvec4 switchValue(1.0f), switchDeriv(0.0f); fvec4 switchValue(1.0f), switchDeriv(0.0f);
...@@ -671,12 +639,13 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do ...@@ -671,12 +639,13 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Accumulate forces. // Accumulate forces.
fvec4 result[4] = {dx*dEdR, dy*dEdR, dz*dEdR, 0.0f};
transpose(result[0], result[1], result[2], result[3]);
fvec4 atomForce(forces+4*atom); fvec4 atomForce(forces+4*atom);
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
if (include[j]) { if (include[j]) {
fvec4 result = blockAtomDelta[j]*dEdR[j]; blockAtomForce[j] += result[j];
blockAtomForce[j] += result; atomForce -= result[j];
atomForce -= result;
} }
} }
atomForce.store(forces+4*atom); atomForce.store(forces+4*atom);
...@@ -697,6 +666,18 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d ...@@ -697,6 +666,18 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2 = dot3(deltaR, deltaR); r2 = dot3(deltaR, deltaR);
} }
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0];
dy = y-posI[1];
dz = z-posI[2];
if (periodic) {
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
r2 = dx*dx + dy*dy + dz*dz;
}
fvec4 CpuNonbondedForce::erfcApprox(fvec4 x) { fvec4 CpuNonbondedForce::erfcApprox(fvec4 x) {
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
...@@ -720,7 +701,7 @@ fvec4 CpuNonbondedForce::ewaldScaleFunction(fvec4 x) { ...@@ -720,7 +701,7 @@ fvec4 CpuNonbondedForce::ewaldScaleFunction(fvec4 x) {
coeff[0] = 1.0f-coeff[1]; coeff[0] = 1.0f-coeff[1];
coeff[2] = coeff[0]*coeff[0]*coeff[0]-coeff[0]; coeff[2] = coeff[0]*coeff[0]*coeff[0]-coeff[0];
coeff[3] = coeff[1]*coeff[1]*coeff[1]-coeff[1]; coeff[3] = coeff[1]*coeff[1]*coeff[1]-coeff[1];
_MM_TRANSPOSE4_PS(coeff[0], coeff[1], coeff[2], coeff[3]); transpose(coeff[0], coeff[1], coeff[2], coeff[3]);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (index[i] < NUM_TABLE_POINTS) if (index[i] < NUM_TABLE_POINTS)
y[i] = dot4(coeff[i], fvec4(&ewaldScaleTable[4*index[i]])); y[i] = dot4(coeff[i], fvec4(&ewaldScaleTable[4*index[i]]));
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
*/ */
#include "openmm/internal/AssertionUtilities.h" #include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/ThreadPool.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "CpuPlatform.h" #include "CpuPlatform.h"
#include "sfmt/SFMT.h" #include "sfmt/SFMT.h"
...@@ -63,8 +64,9 @@ void testNeighborList(bool periodic) { ...@@ -63,8 +64,9 @@ void testNeighborList(bool periodic) {
exclusions[i-j].insert(i); exclusions[i-j].insert(i);
} }
} }
ThreadPool threads;
CpuNeighborList neighborList; CpuNeighborList neighborList;
neighborList.computeNeighborList(numParticles, positions, exclusions, boxSize, periodic, cutoff); neighborList.computeNeighborList(numParticles, positions, exclusions, boxSize, periodic, cutoff, threads);
// Convert the neighbor list to a set for faster lookup. // Convert the neighbor list to a set for faster lookup.
......
...@@ -397,44 +397,44 @@ void testLargeSystem() { ...@@ -397,44 +397,44 @@ void testLargeSystem() {
system.addForce(bonds); system.addForce(bonds);
VerletIntegrator integrator1(0.01); VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01); VerletIntegrator integrator2(0.01);
Context cuContext(system, integrator1, platform); Context cpuContext(system, integrator1, platform);
Context referenceContext(system, integrator2, reference); Context referenceContext(system, integrator2, reference);
cuContext.setPositions(positions); cpuContext.setPositions(positions);
cuContext.setVelocities(velocities); cpuContext.setVelocities(velocities);
referenceContext.setPositions(positions); referenceContext.setPositions(positions);
referenceContext.setVelocities(velocities); referenceContext.setVelocities(velocities);
State cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); State cpuState = cpuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
State referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); State referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(cuState.getPositions()[i], referenceState.getPositions()[i], tol); ASSERT_EQUAL_VEC(cpuState.getPositions()[i], referenceState.getPositions()[i], tol);
ASSERT_EQUAL_VEC(cuState.getVelocities()[i], referenceState.getVelocities()[i], tol); ASSERT_EQUAL_VEC(cpuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol); ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
} }
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
// Now do the same thing with periodic boundary conditions. // Now do the same thing with periodic boundary conditions.
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic); nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize)); system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
cuContext.reinitialize(); cpuContext.reinitialize();
referenceContext.reinitialize(); referenceContext.reinitialize();
cuContext.setPositions(positions); cpuContext.setPositions(positions);
cuContext.setVelocities(velocities); cpuContext.setVelocities(velocities);
referenceContext.setPositions(positions); referenceContext.setPositions(positions);
referenceContext.setVelocities(velocities); referenceContext.setVelocities(velocities);
cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); cpuState = cpuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double dx = cuState.getPositions()[i][0]-referenceState.getPositions()[i][0]; double dx = cpuState.getPositions()[i][0]-referenceState.getPositions()[i][0];
double dy = cuState.getPositions()[i][1]-referenceState.getPositions()[i][1]; double dy = cpuState.getPositions()[i][1]-referenceState.getPositions()[i][1];
double dz = cuState.getPositions()[i][2]-referenceState.getPositions()[i][2]; double dz = cpuState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol); ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol); ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol); ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
ASSERT_EQUAL_VEC(cuState.getVelocities()[i], referenceState.getVelocities()[i], tol); ASSERT_EQUAL_VEC(cpuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol); ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
} }
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
} }
void testDispersionCorrection() { void testDispersionCorrection() {
...@@ -542,15 +542,15 @@ void testChangingParameters() { ...@@ -542,15 +542,15 @@ void testChangingParameters() {
VerletIntegrator integrator1(0.01); VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01); VerletIntegrator integrator2(0.01);
Context cuContext(system, integrator1, platform); Context cpuContext(system, integrator1, platform);
Context referenceContext(system, integrator2, reference); Context referenceContext(system, integrator2, reference);
cuContext.setPositions(positions); cpuContext.setPositions(positions);
referenceContext.setPositions(positions); referenceContext.setPositions(positions);
State cuState = cuContext.getState(State::Forces | State::Energy); State cpuState = cpuContext.getState(State::Forces | State::Energy);
State referenceState = referenceContext.getState(State::Forces | State::Energy); State referenceState = referenceContext.getState(State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol); ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
// Now modify parameters and see if they still agree. // Now modify parameters and see if they still agree.
...@@ -559,13 +559,13 @@ void testChangingParameters() { ...@@ -559,13 +559,13 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon); nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon); nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
} }
nonbonded->updateParametersInContext(cuContext); nonbonded->updateParametersInContext(cpuContext);
nonbonded->updateParametersInContext(referenceContext); nonbonded->updateParametersInContext(referenceContext);
cuState = cuContext.getState(State::Forces | State::Energy); cpuState = cpuContext.getState(State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Forces | State::Energy); referenceState = referenceContext.getState(State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol); ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
} }
void testSwitchingFunction(NonbondedForce::NonbondedMethod method) { void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
......
...@@ -75,9 +75,9 @@ pme_init(pme_t * ppme, ...@@ -75,9 +75,9 @@ pme_init(pme_t * ppme,
*/ */
int int
pme_exec(pme_t pme, pme_exec(pme_t pme,
std::vector<OpenMM::RealVec>& atomCoordinates, const std::vector<OpenMM::RealVec>& atomCoordinates,
std::vector<OpenMM::RealVec>& forces, std::vector<OpenMM::RealVec>& forces,
std::vector<RealOpenMM>& charges, const std::vector<RealOpenMM>& charges,
const RealOpenMM periodicBoxSize[3], const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy, RealOpenMM * energy,
RealOpenMM pme_virial[3][3]); RealOpenMM pme_virial[3][3]);
......
...@@ -195,7 +195,7 @@ pme_calculate_bsplines_moduli(pme_t pme) ...@@ -195,7 +195,7 @@ pme_calculate_bsplines_moduli(pme_t pme)
static void static void
pme_update_grid_index_and_fraction(pme_t pme, pme_update_grid_index_and_fraction(pme_t pme,
vector<RealVec>& atomCoordinates, const vector<RealVec>& atomCoordinates,
const RealOpenMM periodicBoxSize[3]) const RealOpenMM periodicBoxSize[3])
{ {
int i; int i;
...@@ -317,7 +317,7 @@ pme_update_bsplines(pme_t pme) ...@@ -317,7 +317,7 @@ pme_update_bsplines(pme_t pme)
static void static void
pme_grid_spread_charge(pme_t pme, vector<RealOpenMM>& charges) pme_grid_spread_charge(pme_t pme, const vector<RealOpenMM>& charges)
{ {
int order; int order;
int i; int i;
...@@ -521,7 +521,7 @@ pme_reciprocal_convolution(pme_t pme, ...@@ -521,7 +521,7 @@ pme_reciprocal_convolution(pme_t pme,
static void static void
pme_grid_interpolate_force(pme_t pme, pme_grid_interpolate_force(pme_t pme,
const RealOpenMM periodicBoxSize[3], const RealOpenMM periodicBoxSize[3],
vector<RealOpenMM>& charges, const vector<RealOpenMM>& charges,
vector<RealVec>& forces) vector<RealVec>& forces)
{ {
int i; int i;
...@@ -666,11 +666,11 @@ pme_init(pme_t * ppme, ...@@ -666,11 +666,11 @@ pme_init(pme_t * ppme,
int pme_exec(pme_t pme, int pme_exec(pme_t pme,
vector<RealVec>& atomCoordinates, const vector<RealVec>& atomCoordinates,
vector<RealVec>& forces, vector<RealVec>& forces,
vector<RealOpenMM>& charges, const vector<RealOpenMM>& charges,
const RealOpenMM periodicBoxSize[3], const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy, RealOpenMM* energy,
RealOpenMM pme_virial[3][3]) RealOpenMM pme_virial[3][3])
{ {
/* Routine is called with coordinates in x, a box, and charges in q */ /* Routine is called with coordinates in x, a box, and charges in q */
......
...@@ -40,7 +40,7 @@ import simtk.unit as unit ...@@ -40,7 +40,7 @@ import simtk.unit as unit
import simtk.openmm as mm import simtk.openmm as mm
import math import math
import os import os
import distutils import distutils.spawn
HBonds = ff.HBonds HBonds = ff.HBonds
AllBonds = ff.AllBonds AllBonds = ff.AllBonds
...@@ -358,7 +358,7 @@ class GromacsTopFile(object): ...@@ -358,7 +358,7 @@ class GromacsTopFile(object):
raise ValueError('Unsupported function type in [ cmaptypes ] line: '+line); raise ValueError('Unsupported function type in [ cmaptypes ] line: '+line);
self._cmapTypes[tuple(fields[:5])] = fields self._cmapTypes[tuple(fields[:5])] = fields
def __init__(self, file, unitCellDimensions=None, includeDir=None, defines={}): def __init__(self, file, unitCellDimensions=None, includeDir=None, defines=None):
"""Load a top file. """Load a top file.
Parameters: Parameters:
...@@ -368,12 +368,18 @@ class GromacsTopFile(object): ...@@ -368,12 +368,18 @@ class GromacsTopFile(object):
included from the top file. If not specified, we will attempt to locate a gromacs included from the top file. If not specified, we will attempt to locate a gromacs
installation on your system. When gromacs is installed in /usr/local, this will resolve installation on your system. When gromacs is installed in /usr/local, this will resolve
to /usr/local/gromacs/share/gromacs/top to /usr/local/gromacs/share/gromacs/top
- defines (map={}) preprocessor definitions that should be predefined when parsing the file - defines (dict={}) preprocessor definitions that should be predefined when parsing the file
""" """
if includeDir is None: if includeDir is None:
includeDir = _defaultGromacsIncludeDir() includeDir = _defaultGromacsIncludeDir()
self._includeDirs = (os.path.dirname(file), includeDir) self._includeDirs = (os.path.dirname(file), includeDir)
self._defines = defines # Most of the gromacs water itp files for different forcefields,
# unless the preprocessor #define FLEXIBLE is given, don't define
# bonds between the water hydrogen and oxygens, but only give the
# constraint distances and exclusions.
self._defines = {'FLEXIBLE': True}
if defines is not None:
self._defines.update(defines)
# Parse the file. # Parse the file.
......
...@@ -616,6 +616,7 @@ EXCLUDE_SYMLINKS = NO ...@@ -616,6 +616,7 @@ EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = */tests/* \ EXCLUDE_PATTERNS = */tests/* \
*/openmmapi/src/* \ */openmmapi/src/* \
*/internal/* \
*/.svn/* \ */.svn/* \
*OpenMMFortranModule.f90 \ *OpenMMFortranModule.f90 \
*OpenMMCWrapper.h *OpenMMCWrapper.h
......
#!/bin/env python #!/usr/bin/env python
# #
# #
......
...@@ -14,20 +14,7 @@ DOC_STRINGS = {("Context", "setPositions") : ...@@ -14,20 +14,7 @@ DOC_STRINGS = {("Context", "setPositions") :
# Do not generate wrappers for the following methods. # Do not generate wrappers for the following methods.
# Indexed by (className, [methodName [, numParams]]) # Indexed by (className, [methodName [, numParams]])
SKIP_METHODS = [('State',), SKIP_METHODS = [('State',),
('Stream',),
('Vec3',), ('Vec3',),
('AmoebaGeneralizedKirkwoodForceImpl',),
('AmoebaAngleForceImpl',),
('AmoebaBondForceImpl',),
('AmoebaInPlaneAngleForceImpl',),
('AmoebaMultipoleForceImpl',),
('AmoebaOutOfPlaneBendForceImpl',),
('AmoebaPiTorsionForceImpl',),
('AmoebaStretchBendForceImpl',),
('AmoebaTorsionTorsionForceImpl',),
('AmoebaVdwForceImpl',),
('AmoebaWcaDispersionForceImpl',),
('AndersenThermostatImpl',),
('AngleInfo',), ('AngleInfo',),
('ApplyAndersenThermostatKernel',), ('ApplyAndersenThermostatKernel',),
('ApplyConstraintsKernel',), ('ApplyConstraintsKernel',),
...@@ -63,30 +50,14 @@ SKIP_METHODS = [('State',), ...@@ -63,30 +50,14 @@ SKIP_METHODS = [('State',),
('CalcNonbondedForceKernel',), ('CalcNonbondedForceKernel',),
('CalcPeriodicTorsionForceKernel',), ('CalcPeriodicTorsionForceKernel',),
('CalcRBTorsionForceKernel',), ('CalcRBTorsionForceKernel',),
('CMAPTorsionForceImpl',),
('CMMotionRemoverImpl',),
('ComputationInfo',), ('ComputationInfo',),
('ConstraintInfo',), ('ConstraintInfo',),
('ContextImpl',),
('CudaKernelFactory',), ('CudaKernelFactory',),
('CudaStreamFactory',), ('CudaStreamFactory',),
('CustomAngleForceImpl',),
('CustomBondForceImpl',),
('CustomCompoundBondForceImpl',),
('CustomExternalForceImpl',),
('CustomGBForceImpl',),
('CustomHbondForceImpl',),
('CustomNonbondedForceImpl',),
('CustomTorsionForceImpl',),
('ExceptionInfo',), ('ExceptionInfo',),
('ExclusionInfo',), ('ExclusionInfo',),
('ForceImpl',),
('FunctionInfo',), ('FunctionInfo',),
('GBSAOBCForceImpl',),
('GBVIForceImpl',),
('GlobalParameterInfo',), ('GlobalParameterInfo',),
('HarmonicAngleForceImpl',),
('HarmonicBondForceImpl',),
('InitializeForcesKernel',), ('InitializeForcesKernel',),
('IntegrateBrownianStepKernel',), ('IntegrateBrownianStepKernel',),
('IntegrateLangevinStepKernel',), ('IntegrateLangevinStepKernel',),
...@@ -97,24 +68,18 @@ SKIP_METHODS = [('State',), ...@@ -97,24 +68,18 @@ SKIP_METHODS = [('State',),
('Kernel',), ('Kernel',),
('KernelFactory',), ('KernelFactory',),
('KernelImpl',), ('KernelImpl',),
('MonteCarloBarostatImpl',),
('MonteCarloAnisotropicBarostatImpl',),
('MultipoleInfo',), ('MultipoleInfo',),
('NonbondedForceImpl',),
('OutOfPlaneBendInfo',), ('OutOfPlaneBendInfo',),
('ParameterInfo',), ('ParameterInfo',),
('ParticleInfo',), ('ParticleInfo',),
('PeriodicTorsionForceImpl',),
('PeriodicTorsionInfo',), ('PeriodicTorsionInfo',),
('PerParticleParameterInfo',), ('PerParticleParameterInfo',),
('PiTorsionInfo',), ('PiTorsionInfo',),
('PlatformData',), ('PlatformData',),
('RBTorsionForceImpl',),
('RBTorsionInfo',), ('RBTorsionInfo',),
('RemoveCMMotionKernel',), ('RemoveCMMotionKernel',),
('SplineFitter',), ('SplineFitter',),
('StreamFactory',), ('StreamFactory',),
('StreamImpl',),
('StretchBendInfo',), ('StretchBendInfo',),
('TorsionInfo',), ('TorsionInfo',),
('TorsionTorsionGridInfo',), ('TorsionTorsionGridInfo',),
...@@ -139,14 +104,11 @@ SKIP_METHODS = [('State',), ...@@ -139,14 +104,11 @@ SKIP_METHODS = [('State',),
('Platform', 'registerKernelFactory'), ('Platform', 'registerKernelFactory'),
('IntegrateRPMDStepKernel',), ('IntegrateRPMDStepKernel',),
('RPMDIntegrator', 'getState'), ('RPMDIntegrator', 'getState'),
('DrudeForceImpl',),
('CalcDrudeForceKernel',), ('CalcDrudeForceKernel',),
('IntegrateDrudeLangevinStepKernel',), ('IntegrateDrudeLangevinStepKernel',),
('IntegrateDrudeSCFStepKernel',), ('IntegrateDrudeSCFStepKernel',),
('XmlSerializer', 'serialize'), ('XmlSerializer', 'serialize'),
('XmlSerializer', 'deserialize'), ('XmlSerializer', 'deserialize'),
('fvec4',),
('ivec4',),
] ]
# The build script assumes method args that are non-const references are # The build script assumes method args that are non-const references are
...@@ -161,6 +123,8 @@ NO_OUTPUT_ARGS = [('LocalEnergyMinimizer', 'minimize', 'context'), ...@@ -161,6 +123,8 @@ NO_OUTPUT_ARGS = [('LocalEnergyMinimizer', 'minimize', 'context'),
('AmoebaMultipoleForce', 'addParticle', 'molecularDipole'), ('AmoebaMultipoleForce', 'addParticle', 'molecularDipole'),
('AmoebaMultipoleForce', 'addParticle', 'molecularQuadrupole'), ('AmoebaMultipoleForce', 'addParticle', 'molecularQuadrupole'),
('AmoebaMultipoleForce', 'setCovalentMap', 'covalentAtoms'), ('AmoebaMultipoleForce', 'setCovalentMap', 'covalentAtoms'),
('AmoebaMultipoleForce', 'getElectrostaticPotential', 'context'),
('AmoebaMultipoleForce', 'getInducedDipoles', 'context'),
] ]
# SWIG assumes the target language shadow class owns the C++ class # SWIG assumes the target language shadow class owns the C++ class
...@@ -285,6 +249,7 @@ UNITS = { ...@@ -285,6 +249,7 @@ UNITS = {
#("AmoebaMultipoleForce", "getElectrostaticPotential") : ( None, ('unit.kilojoule_per_mole')), #("AmoebaMultipoleForce", "getElectrostaticPotential") : ( None, ('unit.kilojoule_per_mole')),
#("AmoebaMultipoleForce", "getElectrostaticPotential") : ( ('unit.kilojoule_per_mole'), ()), #("AmoebaMultipoleForce", "getElectrostaticPotential") : ( ('unit.kilojoule_per_mole'), ()),
("AmoebaMultipoleForce", "getElectrostaticPotential") : ( None, ()), ("AmoebaMultipoleForce", "getElectrostaticPotential") : ( None, ()),
("AmoebaMultipoleForce", "getInducedDipoles") : ( None, ()),
("AmoebaMultipoleForce", "getSystemMultipoleMoments") : ( None, ()), ("AmoebaMultipoleForce", "getSystemMultipoleMoments") : ( None, ()),
("AmoebaOutOfPlaneBendForce", "getNumOutOfPlaneBends") : ( None, ()), ("AmoebaOutOfPlaneBendForce", "getNumOutOfPlaneBends") : ( None, ()),
......
/* Convert python list of tuples to C++ std::vector of Vec3 objects */ /* Convert python list of tuples to C++ std::vector of Vec3 objects */
%typemap(in) std::vector<Vec3>& (std::vector<OpenMM::Vec3> vVec) { %typemap(in) const std::vector<Vec3>& (std::vector<OpenMM::Vec3> vVec) {
// typemap -- %typemap(in) std::vector<Vec3>& (std::vector<OpenMM::Vec3> vVec) // typemap -- %typemap(in) std::vector<Vec3>& (std::vector<OpenMM::Vec3> vVec)
int i, pLength, itemLength; int i, pLength, itemLength;
double x, y, z; double x, y, z;
...@@ -34,6 +34,32 @@ ...@@ -34,6 +34,32 @@
$1 = &vVec; $1 = &vVec;
} }
/* The following two typemaps cause a non-const vector<Vec3>& to become a return value. */
%typemap(in, numinputs=0) std::vector<Vec3>& (std::vector<Vec3> temp) {
$1 = &temp;
}
%typemap(argout) std::vector<Vec3>& {
int i, n;
PyObject *pyList;
n=(*$1).size();
pyList=PyList_New(n);
PyObject* mm = PyImport_AddModule("simtk.openmm");
PyObject* vec3 = PyObject_GetAttrString(mm, "Vec3");
for (i=0; i<n; i++) {
OpenMM::Vec3& v = (*$1).at(i);
PyObject* args = Py_BuildValue("(d,d,d)", v[0], v[1], v[2]);
PyObject* pyVec = PyObject_CallObject(vec3, args);
Py_DECREF(args);
PyList_SET_ITEM(pyList, i, pyVec);
}
$result = pyList;
}
/* const vector<Vec3> should NOT become an output. */
%typemap(argout) const std::vector<Vec3>& {
}
/* Convert python tuple to C++ Vec3 object*/ /* Convert python tuple to C++ Vec3 object*/
%typemap(typecheck) Vec3 { %typemap(typecheck) Vec3 {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment