Commit 8b44287e authored by Jason Swails's avatar Jason Swails
Browse files

Merge branch 'master' of https://github.com/SimTk/openmm

parents 25580770 291484f2
...@@ -90,8 +90,10 @@ namespace OpenMM_SFMT { ...@@ -90,8 +90,10 @@ namespace OpenMM_SFMT {
class SFMTData { class SFMTData {
public: public:
/** Possibly incorrectly aligned memory for internal state array */
char baseData[(N+1)*sizeof(w128_t)];
/** the 128-bit internal state array */ /** the 128-bit internal state array */
w128_t sfmt[N]; w128_t* sfmt;
/** the 32bit integer pointer to the 128-bit internal state array */ /** the 32bit integer pointer to the 128-bit internal state array */
uint32_t *psfmt32; uint32_t *psfmt32;
#if !defined(BIG_ENDIAN64) || defined(ONLY64) #if !defined(BIG_ENDIAN64) || defined(ONLY64)
...@@ -106,6 +108,9 @@ public: ...@@ -106,6 +108,9 @@ public:
/** a parity check vector which certificate the period of 2^{MEXP} */ /** a parity check vector which certificate the period of 2^{MEXP} */
uint32_t parity[4]; uint32_t parity[4];
SFMTData() { SFMTData() {
char* offsetData = baseData+15;
offsetData -= (long long)offsetData&0xF;
sfmt = (w128_t*) offsetData;
psfmt32 = &sfmt[0].u[0]; psfmt32 = &sfmt[0].u[0];
#if !defined(BIG_ENDIAN64) || defined(ONLY64) #if !defined(BIG_ENDIAN64) || defined(ONLY64)
psfmt64 = (uint64_t *)&sfmt[0].u[0]; psfmt64 = (uint64_t *)&sfmt[0].u[0];
......
...@@ -55,8 +55,9 @@ public: ...@@ -55,8 +55,9 @@ public:
return val; return val;
} }
float operator[](int i) const { float operator[](int i) const {
int resultBits = _mm_extract_ps(val, i); float result[4];
return *((float*) &resultBits); store(result);
return result[i];
} }
void store(float* v) const { void store(float* v) const {
_mm_storeu_ps(v, val); _mm_storeu_ps(v, val);
...@@ -131,7 +132,9 @@ public: ...@@ -131,7 +132,9 @@ public:
return val; return val;
} }
int operator[](int i) const { int operator[](int i) const {
return _mm_extract_epi32(val, i); int result[4];
store(result);
return result[i];
} }
void store(int* v) const { void store(int* v) const {
_mm_storeu_si128((__m128i*) v, val); _mm_storeu_si128((__m128i*) v, val);
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "openmm/State.h" #include "openmm/State.h"
#include "openmm/VirtualSite.h" #include "openmm/VirtualSite.h"
#include "openmm/Context.h" #include "openmm/Context.h"
#include <algorithm>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <utility> #include <utility>
...@@ -94,6 +95,8 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ ...@@ -94,6 +95,8 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ
vector<string> kernelNames; vector<string> kernelNames;
kernelNames.push_back(CalcForcesAndEnergyKernel::Name()); kernelNames.push_back(CalcForcesAndEnergyKernel::Name());
kernelNames.push_back(UpdateStateDataKernel::Name()); kernelNames.push_back(UpdateStateDataKernel::Name());
kernelNames.push_back(ApplyConstraintsKernel::Name());
kernelNames.push_back(VirtualSitesKernel::Name());
for (int i = 0; i < system.getNumForces(); ++i) { for (int i = 0; i < system.getNumForces(); ++i) {
forceImpls.push_back(system.getForce(i).createImpl()); forceImpls.push_back(system.getForce(i).createImpl());
map<string, double> forceParameters = forceImpls[forceImpls.size()-1]->getDefaultParameters(); map<string, double> forceParameters = forceImpls[forceImpls.size()-1]->getDefaultParameters();
...@@ -104,14 +107,40 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ ...@@ -104,14 +107,40 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ
hasInitializedForces = true; hasInitializedForces = true;
vector<string> integratorKernels = integrator.getKernelNames(); vector<string> integratorKernels = integrator.getKernelNames();
kernelNames.insert(kernelNames.begin(), integratorKernels.begin(), integratorKernels.end()); kernelNames.insert(kernelNames.begin(), integratorKernels.begin(), integratorKernels.end());
if (platform == 0)
this->platform = platform = &Platform::findPlatform(kernelNames); // Select a platform to use.
else if (!platform->supportsKernels(kernelNames))
throw OpenMMException("Specified a Platform for a Context which does not support all required kernels"); vector<pair<double, Platform*> > candidatePlatforms;
if (platform == NULL) {
for (int i = 0; i < Platform::getNumPlatforms(); i++) {
Platform& p = Platform::getPlatform(i);
if (p.supportsKernels(kernelNames))
candidatePlatforms.push_back(make_pair(p.getSpeed(), &p));
}
if (candidatePlatforms.size() == 0)
throw OpenMMException("No Platform supports all the requested kernels");
sort(candidatePlatforms.begin(), candidatePlatforms.end());
}
else {
if (!platform->supportsKernels(kernelNames))
throw OpenMMException("Specified a Platform for a Context which does not support all required kernels");
candidatePlatforms.push_back(make_pair(platform->getSpeed(), platform));
}
for (int i = candidatePlatforms.size()-1; i >= 0; i--) {
try {
this->platform = platform = candidatePlatforms[i].second;
platform->contextCreated(*this, properties);
break;
}
catch (...) {
if (i > 0)
continue;
throw;
}
}
// Create and initialize kernels and other objects. // Create and initialize kernels and other objects.
platform->contextCreated(*this, properties);
initializeForcesKernel = platform->createKernel(CalcForcesAndEnergyKernel::Name(), *this); initializeForcesKernel = platform->createKernel(CalcForcesAndEnergyKernel::Name(), *this);
initializeForcesKernel.getAs<CalcForcesAndEnergyKernel>().initialize(system); initializeForcesKernel.getAs<CalcForcesAndEnergyKernel>().initialize(system);
updateStateDataKernel = platform->createKernel(UpdateStateDataKernel::Name(), *this); updateStateDataKernel = platform->createKernel(UpdateStateDataKernel::Name(), *this);
......
...@@ -225,8 +225,8 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() { ...@@ -225,8 +225,8 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
delete[] bonded14IndexArray[i]; delete[] bonded14IndexArray[i];
delete[] bonded14ParamArray[i]; delete[] bonded14ParamArray[i];
} }
delete bonded14IndexArray; delete[] bonded14IndexArray;
delete bonded14ParamArray; delete[] bonded14ParamArray;
} }
if (nonbonded != NULL) if (nonbonded != NULL)
delete nonbonded; delete nonbonded;
......
...@@ -57,7 +57,7 @@ public: ...@@ -57,7 +57,7 @@ public:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) {
} }
CpuNonbondedForce::~CpuNonbondedForce() { CpuNonbondedForce::~CpuNonbondedForce() {
......
...@@ -75,7 +75,7 @@ public: ...@@ -75,7 +75,7 @@ public:
static const int ThreadBlockSize; static const int ThreadBlockSize;
static const int TileSize; static const int TileSize;
CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision, CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
const std::string& compiler, const std::string& tempDir, CudaPlatform::PlatformData& platformData); const std::string& compiler, const std::string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData);
~CudaContext(); ~CudaContext();
/** /**
* This is called to initialize internal data structures after all Forces in the system * This is called to initialize internal data structures after all Forces in the system
......
...@@ -95,6 +95,13 @@ public: ...@@ -95,6 +95,13 @@ public:
static const std::string key = "CudaCompiler"; static const std::string key = "CudaCompiler";
return key; return key;
} }
/**
* This is the name of the parameter for specifying the host compiler for the CUDA compiler to use.
*/
static const std::string& CudaHostCompiler() {
static const std::string key = "CudaHostCompiler";
return key;
}
/** /**
* This is the name of the parameter for specifying the path to the directory for creating temporary files. * This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/ */
...@@ -107,7 +114,7 @@ public: ...@@ -107,7 +114,7 @@ public:
class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData { class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public: public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty, PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty); const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty);
~PlatformData(); ~PlatformData();
void initializeContexts(const System& system); void initializeContexts(const System& system);
void syncContexts(); void syncContexts();
......
...@@ -72,9 +72,12 @@ const int CudaContext::TileSize = sizeof(tileflags)*8; ...@@ -72,9 +72,12 @@ const int CudaContext::TileSize = sizeof(tileflags)*8;
bool CudaContext::hasInitializedCuda = false; bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\"";
if (hostCompiler.size() > 0)
this->compiler = compiler+" --compiler-bindir "+hostCompiler;
if (!hasInitializedCuda) { if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA"); CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
hasInitializedCuda = true; hasInitializedCuda = true;
...@@ -153,9 +156,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -153,9 +156,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device)); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6; int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors; numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
bonded = new CudaBondedUtilities(*this);
nonbonded = new CudaNonbondedUtilities(*this);
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq"); posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
velm = CudaArray::create<double4>(*this, paddedNumAtoms, "velm"); velm = CudaArray::create<double4>(*this, paddedNumAtoms, "velm");
...@@ -166,9 +166,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -166,9 +166,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed2"] = "make_double2"; compilationDefines["make_mixed2"] = "make_double2";
compilationDefines["make_mixed3"] = "make_double3"; compilationDefines["make_mixed3"] = "make_double3";
compilationDefines["make_mixed4"] = "make_double4"; compilationDefines["make_mixed4"] = "make_double4";
energyBuffer = CudaArray::create<double>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq"); posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq");
...@@ -181,9 +178,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -181,9 +178,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed2"] = "make_double2"; compilationDefines["make_mixed2"] = "make_double2";
compilationDefines["make_mixed3"] = "make_double3"; compilationDefines["make_mixed3"] = "make_double3";
compilationDefines["make_mixed4"] = "make_double4"; compilationDefines["make_mixed4"] = "make_double4";
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
} }
else { else {
posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq"); posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq");
...@@ -194,9 +188,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -194,9 +188,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed2"] = "make_float2"; compilationDefines["make_mixed2"] = "make_float2";
compilationDefines["make_mixed3"] = "make_float3"; compilationDefines["make_mixed3"] = "make_float3";
compilationDefines["make_mixed4"] = "make_float4"; compilationDefines["make_mixed4"] = "make_float4";
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
} }
posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0)); posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0));
...@@ -233,6 +224,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -233,6 +224,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// Create utilities objects. // Create utilities objects.
bonded = new CudaBondedUtilities(*this);
nonbonded = new CudaNonbondedUtilities(*this);
integration = new CudaIntegrationUtilities(*this, system); integration = new CudaIntegrationUtilities(*this, system);
expression = new CudaExpressionUtilities(*this); expression = new CudaExpressionUtilities(*this);
} }
...@@ -280,6 +273,22 @@ CudaContext::~CudaContext() { ...@@ -280,6 +273,22 @@ CudaContext::~CudaContext() {
void CudaContext::initialize() { void CudaContext::initialize() {
cuCtxSetCurrent(context); cuCtxSetCurrent(context);
string errorMessage = "Error initializing Context"; string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
energyBuffer = CudaArray::create<double>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else if (useMixedPrecision) {
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else {
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
}
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i); double mass = system.getParticleMass(i);
if (useDoublePrecision || useMixedPrecision) if (useDoublePrecision || useMixedPrecision)
...@@ -441,13 +450,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -441,13 +450,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
out.close(); out.close();
#ifdef WIN32 #ifdef WIN32
#ifdef _DEBUG #ifdef _DEBUG
string command = "\""+compiler+"\" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile; string command = compiler+" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#else #else
string command = "\""+compiler+"\" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile; string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#endif #endif
int res = compileInWindows(command); int res = compileInWindows(command);
#else #else
string command = "\""+compiler+"\" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\""; string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
int res = std::system(command.c_str()); int res = std::system(command.c_str());
#endif #endif
try { try {
......
...@@ -1460,8 +1460,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1460,8 +1460,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq(); CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer(); vector<double4> temp(posq.getSize());
double4* posqd = (double4*) cu.getPinnedBuffer(); float4* posqf = (float4*) &temp[0];
double4* posqd = (double4*) &temp[0];
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0)); vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
...@@ -1486,7 +1487,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1486,7 +1487,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList[exclusions[i].first].push_back(exclusions[i].second); exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first); exclusionList[exclusions[i].second].push_back(exclusions[i].first);
} }
posq.upload(cu.getPinnedBuffer()); posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic); bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
...@@ -2410,8 +2411,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF ...@@ -2410,8 +2411,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cu.addAutoclearBuffer(*bornSum); cu.addAutoclearBuffer(*bornSum);
cu.addAutoclearBuffer(*bornForce); cu.addAutoclearBuffer(*bornForce);
CudaArray& posq = cu.getPosq(); CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer(); vector<double4> temp(posq.getSize());
double4* posqd = (double4*) cu.getPinnedBuffer(); float4* posqf = (float4*) &temp[0];
double4* posqd = (double4*) &temp[0];
vector<float2> paramsVector(cu.getPaddedNumAtoms(), make_float2(1, 1)); vector<float2> paramsVector(cu.getPaddedNumAtoms(), make_float2(1, 1));
const double dielectricOffset = 0.009; const double dielectricOffset = 0.009;
for (int i = 0; i < force.getNumParticles(); i++) { for (int i = 0; i < force.getNumParticles(); i++) {
...@@ -2424,7 +2426,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF ...@@ -2424,7 +2426,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
else else
posqf[i] = make_float4(0, 0, 0, (float) charge); posqf[i] = make_float4(0, 0, 0, (float) charge);
} }
posq.upload(cu.getPinnedBuffer()); posq.upload(&temp[0]);
params->upload(paramsVector); params->upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric())); prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
......
...@@ -90,6 +90,7 @@ CudaPlatform::CudaPlatform() { ...@@ -90,6 +90,7 @@ CudaPlatform::CudaPlatform() {
platformProperties.push_back(CudaUseCpuPme()); platformProperties.push_back(CudaUseCpuPme());
platformProperties.push_back(CudaCompiler()); platformProperties.push_back(CudaCompiler());
platformProperties.push_back(CudaTempDirectory()); platformProperties.push_back(CudaTempDirectory());
platformProperties.push_back(CudaHostCompiler());
setPropertyDefaultValue(CudaDeviceIndex(), ""); setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaDeviceName(), ""); setPropertyDefaultValue(CudaDeviceName(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true"); setPropertyDefaultValue(CudaUseBlockingSync(), "true");
...@@ -114,6 +115,8 @@ CudaPlatform::CudaPlatform() { ...@@ -114,6 +115,8 @@ CudaPlatform::CudaPlatform() {
string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir)); string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
setPropertyDefaultValue(CudaTempDirectory(), tmp); setPropertyDefaultValue(CudaTempDirectory(), tmp);
#endif #endif
char* hostCompiler = getenv("CUDA_HOST_COMPILER");
setPropertyDefaultValue(CudaHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
} }
double CudaPlatform::getSpeed() const { double CudaPlatform::getSpeed() const {
...@@ -149,6 +152,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string ...@@ -149,6 +152,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second); getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ? const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second); getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
const string& hostCompilerPropValue = (properties.find(CudaHostCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower); transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower); transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower); transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
...@@ -156,7 +161,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string ...@@ -156,7 +161,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name()); pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName)) if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false"; cpuPmePropValue = "false";
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue)); context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue));
} }
void CudaPlatform::contextDestroyed(ContextImpl& context) const { void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -165,7 +170,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -165,7 +170,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
} }
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty, CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) { const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true"); bool blocking = (blockingProperty == "true");
vector<string> devices; vector<string> devices;
size_t searchPos = 0, nextPos; size_t searchPos = 0, nextPos;
...@@ -174,15 +179,24 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys ...@@ -174,15 +179,24 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
searchPos = nextPos+1; searchPos = nextPos+1;
} }
devices.push_back(deviceIndexProperty.substr(searchPos)); devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) { try {
if (devices[i].length() > 0) { for (int i = 0; i < (int) devices.size(); i++) {
unsigned int deviceIndex; if (devices[i].length() > 0) {
stringstream(devices[i]) >> deviceIndex; unsigned int deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, *this)); stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
}
} }
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
} }
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, *this));
stringstream deviceIndex, deviceName; stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) { if (i > 0) {
...@@ -202,6 +216,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys ...@@ -202,6 +216,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false"; propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty; propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty; propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
contextEnergy.resize(contexts.size()); contextEnergy.resize(contexts.size());
// Determine whether peer-to-peer copying is supported, and enable it if so. // Determine whether peer-to-peer copying is supported, and enable it if so.
......
...@@ -55,7 +55,8 @@ void testGaussian() { ...@@ -55,7 +55,8 @@ void testGaussian() {
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory())); platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()));
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -65,7 +65,8 @@ void verifySorting(vector<float> array) { ...@@ -65,7 +65,8 @@ void verifySorting(vector<float> array) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory())); platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()));
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
CudaArray data(context, array.size(), 4, "sortData"); CudaArray data(context, array.size(), 4, "sortData");
......
...@@ -253,8 +253,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -253,8 +253,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize); paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize; numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
if (useDoublePrecision) { if (useDoublePrecision) {
posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq"); posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm"); velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm");
...@@ -343,6 +341,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -343,6 +341,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Create utilities objects. // Create utilities objects.
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
integration = new OpenCLIntegrationUtilities(*this, system); integration = new OpenCLIntegrationUtilities(*this, system);
expression = new OpenCLExpressionUtilities(*this); expression = new OpenCLExpressionUtilities(*this);
} }
......
...@@ -143,15 +143,24 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p ...@@ -143,15 +143,24 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos = nextPos+1; searchPos = nextPos+1;
} }
devices.push_back(deviceIndexProperty.substr(searchPos)); devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) { try {
if (devices[i].length() > 0) { for (int i = 0; i < (int) devices.size(); i++) {
unsigned int deviceIndex; if (devices[i].length() > 0) {
stringstream(devices[i]) >> deviceIndex; unsigned int deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this)); stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
}
} }
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
} }
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
stringstream deviceIndex, deviceName; stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) { if (i > 0) {
......
#
#
#
""" """
Package simtk.openmm Package simtk.openmm
...@@ -13,25 +9,7 @@ It also tries to load any plugin modules it can find. ...@@ -13,25 +9,7 @@ It also tries to load any plugin modules it can find.
__author__ = "Randall J. Radmer" __author__ = "Randall J. Radmer"
import os, sys, glob, os.path import os, os.path
if sys.platform == "win32":
libPrefix=""
libExt="dll"
elif sys.platform == 'darwin':
libPrefix="lib"
libExt="dylib"
else:
libPrefix="lib"
libExt="so"
# The following is an evil incantation that is needed to permit
# the POSIX "dlopen" function to work. I do not understand
# it. If a better solution is known, please forward to the
# PyOpenMM code maintainers.
import ctypes
flags = sys.getdlopenflags()
sys.setdlopenflags(flags | ctypes.RTLD_GLOBAL)
from simtk.openmm.openmm import * from simtk.openmm.openmm import *
from simtk.openmm.vec3 import Vec3 from simtk.openmm.vec3 import Vec3
from simtk.openmm import version from simtk.openmm import version
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment