Commit 8b44287e authored by Jason Swails's avatar Jason Swails
Browse files

Merge branch 'master' of https://github.com/SimTk/openmm

parents 25580770 291484f2
......@@ -90,8 +90,10 @@ namespace OpenMM_SFMT {
class SFMTData {
public:
/** Possibly incorrectly aligned memory for internal state array */
char baseData[(N+1)*sizeof(w128_t)];
/** the 128-bit internal state array */
w128_t sfmt[N];
w128_t* sfmt;
/** the 32bit integer pointer to the 128-bit internal state array */
uint32_t *psfmt32;
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
......@@ -106,6 +108,9 @@ public:
/** a parity check vector which certificate the period of 2^{MEXP} */
uint32_t parity[4];
SFMTData() {
char* offsetData = baseData+15;
offsetData -= (long long)offsetData&0xF;
sfmt = (w128_t*) offsetData;
psfmt32 = &sfmt[0].u[0];
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
psfmt64 = (uint64_t *)&sfmt[0].u[0];
......
......@@ -55,8 +55,9 @@ public:
return val;
}
float operator[](int i) const {
int resultBits = _mm_extract_ps(val, i);
return *((float*) &resultBits);
float result[4];
store(result);
return result[i];
}
void store(float* v) const {
_mm_storeu_ps(v, val);
......@@ -131,7 +132,9 @@ public:
return val;
}
int operator[](int i) const {
return _mm_extract_epi32(val, i);
int result[4];
store(result);
return result[i];
}
void store(int* v) const {
_mm_storeu_si128((__m128i*) v, val);
......
......@@ -39,6 +39,7 @@
#include "openmm/State.h"
#include "openmm/VirtualSite.h"
#include "openmm/Context.h"
#include <algorithm>
#include <iostream>
#include <map>
#include <utility>
......@@ -94,6 +95,8 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ
vector<string> kernelNames;
kernelNames.push_back(CalcForcesAndEnergyKernel::Name());
kernelNames.push_back(UpdateStateDataKernel::Name());
kernelNames.push_back(ApplyConstraintsKernel::Name());
kernelNames.push_back(VirtualSitesKernel::Name());
for (int i = 0; i < system.getNumForces(); ++i) {
forceImpls.push_back(system.getForce(i).createImpl());
map<string, double> forceParameters = forceImpls[forceImpls.size()-1]->getDefaultParameters();
......@@ -104,14 +107,40 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ
hasInitializedForces = true;
vector<string> integratorKernels = integrator.getKernelNames();
kernelNames.insert(kernelNames.begin(), integratorKernels.begin(), integratorKernels.end());
if (platform == 0)
this->platform = platform = &Platform::findPlatform(kernelNames);
else if (!platform->supportsKernels(kernelNames))
throw OpenMMException("Specified a Platform for a Context which does not support all required kernels");
// Select a platform to use.
vector<pair<double, Platform*> > candidatePlatforms;
if (platform == NULL) {
for (int i = 0; i < Platform::getNumPlatforms(); i++) {
Platform& p = Platform::getPlatform(i);
if (p.supportsKernels(kernelNames))
candidatePlatforms.push_back(make_pair(p.getSpeed(), &p));
}
if (candidatePlatforms.size() == 0)
throw OpenMMException("No Platform supports all the requested kernels");
sort(candidatePlatforms.begin(), candidatePlatforms.end());
}
else {
if (!platform->supportsKernels(kernelNames))
throw OpenMMException("Specified a Platform for a Context which does not support all required kernels");
candidatePlatforms.push_back(make_pair(platform->getSpeed(), platform));
}
for (int i = candidatePlatforms.size()-1; i >= 0; i--) {
try {
this->platform = platform = candidatePlatforms[i].second;
platform->contextCreated(*this, properties);
break;
}
catch (...) {
if (i > 0)
continue;
throw;
}
}
// Create and initialize kernels and other objects.
platform->contextCreated(*this, properties);
initializeForcesKernel = platform->createKernel(CalcForcesAndEnergyKernel::Name(), *this);
initializeForcesKernel.getAs<CalcForcesAndEnergyKernel>().initialize(system);
updateStateDataKernel = platform->createKernel(UpdateStateDataKernel::Name(), *this);
......
......@@ -225,8 +225,8 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
delete[] bonded14IndexArray[i];
delete[] bonded14ParamArray[i];
}
delete bonded14IndexArray;
delete bonded14ParamArray;
delete[] bonded14IndexArray;
delete[] bonded14ParamArray;
}
if (nonbonded != NULL)
delete nonbonded;
......
......@@ -57,7 +57,7 @@ public:
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false) {
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) {
}
CpuNonbondedForce::~CpuNonbondedForce() {
......
......@@ -75,7 +75,7 @@ public:
static const int ThreadBlockSize;
static const int TileSize;
CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
const std::string& compiler, const std::string& tempDir, CudaPlatform::PlatformData& platformData);
const std::string& compiler, const std::string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData);
~CudaContext();
/**
* This is called to initialize internal data structures after all Forces in the system
......
......@@ -95,6 +95,13 @@ public:
static const std::string key = "CudaCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the host compiler for the CUDA compiler to use.
*/
static const std::string& CudaHostCompiler() {
static const std::string key = "CudaHostCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
......@@ -107,7 +114,7 @@ public:
class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty);
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
......
......@@ -72,9 +72,12 @@ const int CudaContext::TileSize = sizeof(tileflags)*8;
bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\"";
if (hostCompiler.size() > 0)
this->compiler = compiler+" --compiler-bindir "+hostCompiler;
if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
hasInitializedCuda = true;
......@@ -153,9 +156,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
bonded = new CudaBondedUtilities(*this);
nonbonded = new CudaNonbondedUtilities(*this);
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
velm = CudaArray::create<double4>(*this, paddedNumAtoms, "velm");
......@@ -166,9 +166,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed2"] = "make_double2";
compilationDefines["make_mixed3"] = "make_double3";
compilationDefines["make_mixed4"] = "make_double4";
energyBuffer = CudaArray::create<double>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else if (useMixedPrecision) {
posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq");
......@@ -181,9 +178,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed2"] = "make_double2";
compilationDefines["make_mixed3"] = "make_double3";
compilationDefines["make_mixed4"] = "make_double4";
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else {
posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq");
......@@ -194,9 +188,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed2"] = "make_float2";
compilationDefines["make_mixed3"] = "make_float3";
compilationDefines["make_mixed4"] = "make_float4";
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
}
posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0));
......@@ -233,6 +224,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// Create utilities objects.
bonded = new CudaBondedUtilities(*this);
nonbonded = new CudaNonbondedUtilities(*this);
integration = new CudaIntegrationUtilities(*this, system);
expression = new CudaExpressionUtilities(*this);
}
......@@ -280,6 +273,22 @@ CudaContext::~CudaContext() {
void CudaContext::initialize() {
cuCtxSetCurrent(context);
string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
energyBuffer = CudaArray::create<double>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else if (useMixedPrecision) {
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else {
energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
}
for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i);
if (useDoublePrecision || useMixedPrecision)
......@@ -441,13 +450,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
out.close();
#ifdef WIN32
#ifdef _DEBUG
string command = "\""+compiler+"\" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
string command = compiler+" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#else
string command = "\""+compiler+"\" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#endif
int res = compileInWindows(command);
#else
string command = "\""+compiler+"\" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
int res = std::system(command.c_str());
#endif
try {
......
......@@ -1460,8 +1460,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int numParticles = force.getNumParticles();
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
double4* posqd = (double4*) &temp[0];
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
......@@ -1486,7 +1487,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
posq.upload(cu.getPinnedBuffer());
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
......@@ -2410,8 +2411,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cu.addAutoclearBuffer(*bornSum);
cu.addAutoclearBuffer(*bornForce);
CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
double4* posqd = (double4*) &temp[0];
vector<float2> paramsVector(cu.getPaddedNumAtoms(), make_float2(1, 1));
const double dielectricOffset = 0.009;
for (int i = 0; i < force.getNumParticles(); i++) {
......@@ -2424,7 +2426,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
}
posq.upload(cu.getPinnedBuffer());
posq.upload(&temp[0]);
params->upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
......
......@@ -90,6 +90,7 @@ CudaPlatform::CudaPlatform() {
platformProperties.push_back(CudaUseCpuPme());
platformProperties.push_back(CudaCompiler());
platformProperties.push_back(CudaTempDirectory());
platformProperties.push_back(CudaHostCompiler());
setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaDeviceName(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true");
......@@ -114,6 +115,8 @@ CudaPlatform::CudaPlatform() {
string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
setPropertyDefaultValue(CudaTempDirectory(), tmp);
#endif
char* hostCompiler = getenv("CUDA_HOST_COMPILER");
setPropertyDefaultValue(CudaHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
}
double CudaPlatform::getSpeed() const {
......@@ -149,6 +152,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
const string& hostCompilerPropValue = (properties.find(CudaHostCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
......@@ -156,7 +161,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false";
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -165,7 +170,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
size_t searchPos = 0, nextPos;
......@@ -174,15 +179,24 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
searchPos = nextPos+1;
}
devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, *this));
try {
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
}
}
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
}
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, *this));
stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) {
......@@ -202,6 +216,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
contextEnergy.resize(contexts.size());
// Determine whether peer-to-peer copying is supported, and enable it if so.
......
......@@ -55,7 +55,8 @@ void testGaussian() {
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -65,7 +65,8 @@ void verifySorting(vector<float> array) {
System system;
system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
CudaArray data(context, array.size(), 4, "sortData");
......
......@@ -253,8 +253,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
if (useDoublePrecision) {
posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm");
......@@ -343,6 +341,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Create utilities objects.
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
integration = new OpenCLIntegrationUtilities(*this, system);
expression = new OpenCLExpressionUtilities(*this);
}
......
......@@ -143,15 +143,24 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos = nextPos+1;
}
devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
try {
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
}
}
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
}
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) {
......
#
#
#
"""
Package simtk.openmm
......@@ -13,25 +9,7 @@ It also tries to load any plugin modules it can find.
__author__ = "Randall J. Radmer"
import os, sys, glob, os.path
if sys.platform == "win32":
libPrefix=""
libExt="dll"
elif sys.platform == 'darwin':
libPrefix="lib"
libExt="dylib"
else:
libPrefix="lib"
libExt="so"
# The following is an evil incantation that is needed to permit
# the POSIX "dlopen" function to work. I do not understand
# it. If a better solution is known, please forward to the
# PyOpenMM code maintainers.
import ctypes
flags = sys.getdlopenflags()
sys.setdlopenflags(flags | ctypes.RTLD_GLOBAL)
import os, os.path
from simtk.openmm.openmm import *
from simtk.openmm.vec3 import Vec3
from simtk.openmm import version
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment