Commit b989fd9b authored by peastman's avatar peastman
Browse files

Added option for setting CUDA host compiler

parent ce9e41ab
......@@ -75,7 +75,7 @@ public:
static const int ThreadBlockSize;
static const int TileSize;
CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
const std::string& compiler, const std::string& tempDir, CudaPlatform::PlatformData& platformData);
const std::string& compiler, const std::string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData);
~CudaContext();
/**
* This is called to initialize internal data structures after all Forces in the system
......
......@@ -95,6 +95,13 @@ public:
static const std::string key = "CudaCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the host compiler for the CUDA compiler to use.
*/
static const std::string& CudaHostCompiler() {
static const std::string key = "CudaHostCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
......@@ -107,7 +114,7 @@ public:
class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty);
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
......
......@@ -72,9 +72,11 @@ const int CudaContext::TileSize = sizeof(tileflags)*8;
bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (hostCompiler.size() > 0)
this->compiler = compiler+" --compiler-bindir "+hostCompiler;
if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
hasInitializedCuda = true;
......
......@@ -90,6 +90,7 @@ CudaPlatform::CudaPlatform() {
platformProperties.push_back(CudaUseCpuPme());
platformProperties.push_back(CudaCompiler());
platformProperties.push_back(CudaTempDirectory());
platformProperties.push_back(CudaHostCompiler());
setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaDeviceName(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true");
......@@ -114,6 +115,8 @@ CudaPlatform::CudaPlatform() {
string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
setPropertyDefaultValue(CudaTempDirectory(), tmp);
#endif
char* hostCompiler = getenv("CUDA_HOST_COMPILER");
setPropertyDefaultValue(CudaHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
}
double CudaPlatform::getSpeed() const {
......@@ -149,6 +152,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
const string& hostCompilerPropValue = (properties.find(CudaHostCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
......@@ -156,7 +161,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false";
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -165,7 +170,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
size_t searchPos = 0, nextPos;
......@@ -178,11 +183,11 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, *this));
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
}
}
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, *this));
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) {
......@@ -202,6 +207,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
contextEnergy.resize(contexts.size());
// Determine whether peer-to-peer copying is supported, and enable it if so.
......
......@@ -55,7 +55,8 @@ void testGaussian() {
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -65,7 +65,8 @@ void verifySorting(vector<float> array) {
System system;
system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
CudaArray data(context, array.size(), 4, "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment