Unverified Commit b9c45d45 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Always use hipRTC, support Windows

* Unload all loaded modules in HipContext's destructor,
  HIP modules keep file desctriptors opened, but OpenMM never unloads
  modules leaking these file descriptors. This can cause crashinf of
  some scripts like test-openmm-platforms from openmmtools.
* ROCm 6.0 defines operator* for complex types (that are typedefs for
  float2 and double2), they conflict with operators defined for vectors.
  This is fixed in newer ROCm versions.
* Revert HIP_DYNAMIC_SHARED back to extern __shared__ (the macro is
  in the headers).
* Reduce the speed of the HIP platform if there are no HIP devices in
  the system.
parent a0acfbc9
......@@ -4,7 +4,9 @@
# Creates OpenMMHIP library.
#
# Windows:
# unsupported
# OpenMMHIP.dll
# OpenMMHIP.lib
# OpenMMHIP_static.lib
# Unix:
# libOpenMMHIP.so
# libOpenMMHIP_static.a
......
......@@ -43,7 +43,6 @@
#include <utility>
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
// Prevent Windows from defining macros that interfere with other code.
#define NOMINMAX
#endif
......@@ -85,8 +84,7 @@ public:
static const int ThreadBlockSize;
static const int TileSize;
HipContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
const std::string& compiler, const std::string& tempDir, const std::string& hostCompiler, HipPlatform::PlatformData& platformData,
HipContext* originalContext);
const std::string& tempDir, HipPlatform::PlatformData& platformData, HipContext* originalContext);
~HipContext();
/**
* This is called to initialize internal data structures after all Forces in the system
......@@ -255,6 +253,18 @@ public:
HipArray& getAtomIndexArray() {
return atomIndexDevice;
}
/**
* Get a file name in tempDir unique for the current process and context.
*/
std::string getTempFileName() const;
/**
* Get src hash.
*/
std::string getHash(const std::string& src) const;
/**
* Get a filename in cacheDir based on src hash.
*/
std::string getCacheFileName(const std::string& src) const;
/**
* Create a HIP module from source code.
*
......@@ -555,6 +565,10 @@ public:
* expense of reduced simulation performance.
*/
void flushQueue();
/**
* Get the flags that should be used when allocating pinned host memory.
*/
unsigned int getHostMallocFlags();
private:
/**
* Compute a sorted list of device indices in decreasing order of desirability
......@@ -571,12 +585,13 @@ private:
int multiprocessors;
int sharedMemPerBlock;
bool supportsHardwareFloatGlobalAtomicAdd;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasCompilerKernel, isHipccAvailable, hasAssignedPosqCharges;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasAssignedPosqCharges;
bool isLinkedContext;
std::string compiler, tempDir, cacheDir, gpuArchitecture;
std::string tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules;
hipDevice_t device;
hipStream_t currentStream;
hipFunction_t clearBufferKernel;
......@@ -605,7 +620,6 @@ private:
HipExpressionUtilities* expression;
HipBondedUtilities* bonded;
HipNonbondedUtilities* nonbonded;
Kernel compilerKernel;
};
/**
......
......@@ -40,27 +40,6 @@
namespace OpenMM {
/**
* This abstract class defines an interface for code that can compile CUDA kernels. This allows a plugin to take advantage of runtime compilation
* when running on recent versions of CUDA.
*/
class HipCompilerKernel : public KernelImpl {
public:
static std::string Name() {
return "HipCompilerKernel";
}
HipCompilerKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
}
/**
* Compile a kernel to PTX.
*
* @param source the source code for the kernel
* @param options the flags to be passed to the compiler
* @param cu the HipContext for which the kernel is being compiled
*/
virtual std::string createModule(const std::string& source, const std::string& flags, HipContext& cu) = 0;
};
/**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any
......
......@@ -91,20 +91,6 @@ public:
static const std::string key = "UseCpuPme";
return key;
}
/**
* This is the name of the parameter for specifying the path to the HIP compiler.
*/
static const std::string& HipCompiler() {
static const std::string key = "HipCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the host compiler for the HIP compiler to use.
*/
static const std::string& HipHostCompiler() {
static const std::string key = "HipHostCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
......@@ -131,7 +117,7 @@ public:
class OPENMM_EXPORT_COMMON HipPlatform::PlatformData {
public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty,
const std::string& cpuPmeProperty, const std::string& tempProperty,
const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext);
~PlatformData();
void initializeContexts(const System& system);
......
......@@ -26,9 +26,8 @@
* -------------------------------------------------------------------------- */
#ifdef WIN32
#error "Windows unsupported for HIP platform"
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include <cmath>
#include "HipContext.h"
#include "HipArray.h"
#include "HipBondedUtilities.h"
......@@ -46,15 +45,18 @@
#include "HipExpressionUtilities.h"
#include "openmm/internal/ContextImpl.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <set>
#include <sstream>
#include <stack>
#include <thread>
#include <typeinfo>
#include <sys/stat.h>
#include <unistd.h>
#include <hip/hiprtc.h>
#define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage);
......@@ -65,6 +67,13 @@
throw OpenMMException(m.str());\
}
#define HIPRTC_CHECK_RESULT(result, prefix) \
if (result != HIPRTC_SUCCESS) { \
stringstream m; \
m<<prefix<<": "<<hiprtcGetErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
}
using namespace OpenMM;
using namespace std;
......@@ -73,27 +82,10 @@ const int HipContext::TileSize = 32;
bool HipContext::hasInitializedHip = false;
HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, HipPlatform::PlatformData& platformData, HipContext* originalContext) : ComputeContext(system), currentStream(0),
platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false),
hasCompilerKernel(false), isHipccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL),
HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& tempDir, HipPlatform::PlatformData& platformData,
HipContext* originalContext) : ComputeContext(system), currentStream(0), platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false),
pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL),
supportsHardwareFloatGlobalAtomicAdd(false) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) {
try {
compilerKernel = platformData.context->getPlatform().createKernel(HipCompilerKernel::Name(), *platformData.context);
hasCompilerKernel = true;
}
catch (...) {
// The runtime compiler plugin isn't available.
}
}
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str());
struct stat info;
isHipccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
if (!hasInitializedHip) {
CHECK_RESULT2(hipInit(0), "Error initializing HIP");
hasInitializedHip = true;
......@@ -114,8 +106,13 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
throw OpenMMException("Illegal value for Precision: "+precision);
char* cacheVariable = getenv("OPENMM_CACHE_DIR");
cacheDir = (cacheVariable == NULL ? tempDir : string(cacheVariable));
#ifdef WIN32
this->tempDir = tempDir+"\\";
cacheDir = cacheDir+"\\";
#else
this->tempDir = tempDir+"/";
cacheDir = cacheDir+"/";
#endif
contextIndex = platformData.contexts.size();
string errorMessage = "Error initializing Context";
if (originalContext == NULL) {
......@@ -366,6 +363,8 @@ HipContext::~HipContext() {
delete bonded;
if (nonbonded != NULL)
delete nonbonded;
for (auto module : loadedModules)
hipModuleUnload(module);
contextIsValid = false;
}
......@@ -377,19 +376,19 @@ void HipContext::initialize() {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
}
else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
}
else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
}
for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i);
......@@ -423,15 +422,46 @@ void HipContext::setAsCurrent() {
hipSetDevice(device);
}
string HipContext::getTempFileName() const {
stringstream tempFileName;
tempFileName << tempDir;
tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions.
tempFileName << "_" << std::this_thread::get_id();
return tempFileName.str();
}
string HipContext::getHash(const string& src) const {
CSHA1 sha1;
sha1.Update((const UINT_8*) src.c_str(), src.size());
sha1.Final();
UINT_8 hash[20];
sha1.GetHash(hash);
stringstream cacheHash;
cacheHash.flags(ios::hex);
for (int i = 0; i < 20; i++)
cacheHash << setw(2) << setfill('0') << (int) hash[i];
return cacheHash.str();
}
string HipContext::getCacheFileName(const string& src) const {
stringstream cacheFile;
cacheFile << cacheDir << "openmm-hip-" << getHash(src + gpuArchitecture);
return cacheFile.str();
}
hipModule_t HipContext::createModule(const string source) {
return createModule(source, map<string, string>());
}
hipModule_t HipContext::createModule(const string source, const map<string, string>& defines) {
const char* saveTempsEnv = getenv("OPENMM_SAVE_TEMPS");
bool saveTemps = saveTempsEnv != nullptr;
string bits = intToString(8*sizeof(void*));
string options = "-ffast-math -munsafe-fp-atomics -Wall";
const bool saveTemps = saveTempsEnv != nullptr && string(saveTempsEnv) == "1";
int runtimeVersion;
CHECK_RESULT2(hipRuntimeGetVersion(&runtimeVersion), "Error getting HIP runtime version");
string options = "-O3 -ffast-math -munsafe-fp-atomics -Wall -Wno-hip-only";
options += " --offload-arch=" + gpuArchitecture;
if (gpuArchitecture.find("gfx90a") == 0 ||
gpuArchitecture.find("gfx94") == 0) {
// HIP-TODO: Remove it when the compiler does a better job
......@@ -442,9 +472,15 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri
if (getMaxThreadBlockSize() < 1024) {
options += " --gpu-max-threads-per-block=" + std::to_string(getMaxThreadBlockSize());
}
if (runtimeVersion < 60140092) {
// Workaround for operator* defined for complex types (typedefs for float2, double2) in
// ROCm 6.0 headers. This issue has been fixed in 6.1. hipRTC includes amd_hip_complex.h
// by default, we fool the include guard into thinking the header is already included.
options += " -D HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H";
}
stringstream src;
if (!options.empty())
src << "// Compilation Options: " << options << endl << endl;
src << "// Compilation Options: " << options << endl << endl;
src << "// HIP Runtime Version: " << runtimeVersion << endl << endl;
for (auto& pair : compilationDefines) {
// Query defines to avoid duplicate variables
if (defines.find(pair.first) == defines.end()) {
......@@ -457,11 +493,6 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri
if (!compilationDefines.empty())
src << endl;
// include the main header for built-in variables (threadIdx etc.) and functions
src << "#include \"hip/hip_runtime.h\"\n";
// include the vector types
src << "#include \"hip/hip_vector_types.h\"\n";
if (useDoublePrecision) {
src << "typedef double real;\n";
src << "typedef double2 real2;\n";
......@@ -501,113 +532,88 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri
// See whether we already have PTX for this kernel cached.
CSHA1 sha1;
sha1.Update((const UINT_8*) src.str().c_str(), src.str().size());
sha1.Final();
UINT_8 hash[20];
sha1.GetHash(hash);
stringstream cacheHash;
cacheHash.flags(ios::hex);
for (int i = 0; i < 20; i++)
cacheHash << setw(2) << setfill('0') << (int) hash[i];
stringstream cacheFile;
cacheFile << cacheDir << cacheHash.str() << '_' << gpuArchitecture << '_' << bits;
string cacheFile = getCacheFileName(src.str());
hipModule_t module;
if (hipModuleLoad(&module, cacheFile.str().c_str()) == hipSuccess)
if (hipModuleLoad(&module, cacheFile.c_str()) == hipSuccess) {
loadedModules.push_back(module);
return module;
}
// Select names for the various temporary files.
stringstream tempFileName;
if (saveTemps) {
tempFileName << saveTempsEnv;
stringstream tempFileName;
const char* saveTempsPrefixEnv = getenv("OPENMM_SAVE_TEMPS_PREFIX");
if (saveTempsPrefixEnv) {
tempFileName << saveTempsPrefixEnv;
}
tempFileName << cacheHash.str();
}
else {
tempFileName << tempDir;
tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions.
tempFileName << "_" << getpid();
}
string inputFile = (tempFileName.str()+".hip.cpp");
string outputFile = (tempFileName.str()+".hsaco");
string logFile = (tempFileName.str()+".log");
int res = 0;
// If the runtime compiler plugin is available, use it.
if (hasCompilerKernel) {
string ptx = compilerKernel.getAs<HipCompilerKernel>().createModule(src.str(), options, *this);
// If possible, write the PTX out to a temporary file so we can cache it for later use.
bool wroteCache = false;
try {
ofstream out(outputFile.c_str());
out << ptx;
out.close();
if (!out.fail())
wroteCache = true;
}
catch (...) {
// Ignore.
}
if (!wroteCache) {
// An error occurred. Possibly we don't have permission to write to the temp directory. Just try to load the module directly.
tempFileName << getHash(src.str());
CHECK_RESULT2(hipModuleLoadDataEx(&module, &ptx[0], 0, NULL, NULL), "Error loading HIP module");
return module;
}
}
else {
// Write out the source to a temporary file.
options += " --save-temps";
string inputFile = (tempFileName.str()+".hip");
std::cout << "Source code: " << inputFile << std::endl;
std::cout << "Compile options: " << options << std::endl;
ofstream out(inputFile.c_str());
out << src.str();
out.close();
string command = compiler + " --genco --amdgpu-target=" + gpuArchitecture + " " + options + (saveTemps ? " -save-temps=obj" : "") +" -o \""+outputFile+"\" " + " \""+inputFile+"\" 2> \""+logFile+"\"";
res = std::system(command.c_str());
}
// Split the command line options into an array of options.
stringstream flagsStream(options);
string flag;
vector<string> splitFlags;
while (flagsStream >> flag)
splitFlags.push_back(flag);
int numOptions = splitFlags.size();
vector<const char*> optionsVec(numOptions);
for (int i = 0; i < numOptions; i++)
optionsVec[i] = &splitFlags[i][0];
// Compile the program to CO.
hiprtcProgram program;
HIPRTC_CHECK_RESULT(hiprtcCreateProgram(&program, src.str().c_str(), NULL, 0, NULL, NULL), "Error creating program");
try {
if (res != 0) {
// Load the error log.
stringstream error;
error << "Error launching HIP compiler: " << res;
ifstream log(logFile.c_str());
if (log.is_open()) {
string line;
while (!log.eof()) {
getline(log, line);
error << '\n' << line;
hiprtcResult result = hiprtcCompileProgram(program, optionsVec.size(), &optionsVec[0]);
if (result != HIPRTC_SUCCESS || saveTemps) {
size_t logSize;
hiprtcGetProgramLogSize(program, &logSize);
std::string log(logSize, '\0');
if (logSize > 0) {
hiprtcGetProgramLog(program, &log[0]);
if (saveTemps) {
std::cout << "Log: " << log << std::endl;
}
log.close();
}
throw OpenMMException(error.str());
if (result != HIPRTC_SUCCESS) {
throw OpenMMException("Error compiling program: "+log);
}
}
hipError_t result = hipModuleLoad(&module, outputFile.c_str());
if (result != hipSuccess) {
std::stringstream m;
m<<"Error loading HIP module: "<<getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
size_t codeSize;
hiprtcGetCodeSize(program, &codeSize);
vector<char> code(codeSize);
hiprtcGetCode(program, &code[0]);
hiprtcDestroyProgram(&program);
// If possible, write the CO out to a cache file for later use.
try {
ofstream out(cacheFile.c_str(), ios::out | ios::binary);
out.write(&code[0], code.size());
out.close();
}
if (!saveTemps) {
remove(inputFile.c_str());
remove(logFile.c_str());
catch (...) {
// An error occurred. Possibly we don't have permission to write to the temp directory.
// Ignore.
}
if (rename(outputFile.c_str(), cacheFile.str().c_str()) != 0 && !saveTemps)
remove(outputFile.c_str());
CHECK_RESULT2(hipModuleLoadDataEx(&module, &code[0], 0, NULL, NULL), "Error loading HIP module");
loadedModules.push_back(module);
return module;
}
catch (...) {
if (!saveTemps) {
remove(inputFile.c_str());
remove(outputFile.c_str());
remove(logFile.c_str());
}
hiprtcDestroyProgram(&program);
throw;
}
}
......@@ -834,3 +840,11 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence;
}
unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32
return hipHostMallocDefault;
#else
return hipHostMallocNumaUser;
#endif
}
......@@ -42,7 +42,7 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), hipHostMallocMapped), "Error allocating pinned memory");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
}
......
......@@ -28,9 +28,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
#endif
#include "openmm/common/windowsExportCommon.h"
#include <string>
......
......@@ -70,7 +70,7 @@ HipNonbondedUtilities::HipNonbondedUtilities(HipContext& context) : context(cont
string errorMessage = "Error initializing nonbonded utilities";
CHECK_RESULT(hipEventCreateWithFlags(&downloadCountEvent, context.getEventFlags()));
CHECK_RESULT(hipHostMalloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), hipHostMallocPortable));
CHECK_RESULT(hipHostMalloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), context.getHostMallocFlags()));
numForceThreadBlocks = 5*4*context.getMultiprocessors();
forceThreadBlockSize = 64;
findInteractingBlocksThreadBlockSize = context.getSIMDWidth();
......
......@@ -43,7 +43,6 @@ if (result != hipSuccess) { \
* Get the current clock time, measured in microseconds.
*/
#ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
#include <Windows.h>
static long long getTime() {
FILETIME ft;
......
......@@ -39,7 +39,7 @@
#include <sstream>
#include <cstdio>
#ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
#include <Windows.h>
#endif
using namespace OpenMM;
using namespace std;
......@@ -112,9 +112,7 @@ HipPlatform::HipPlatform() {
platformProperties.push_back(HipUseBlockingSync());
platformProperties.push_back(HipPrecision());
platformProperties.push_back(HipUseCpuPme());
platformProperties.push_back(HipCompiler());
platformProperties.push_back(HipTempDirectory());
platformProperties.push_back(HipHostCompiler());
platformProperties.push_back(HipDisablePmeStream());
platformProperties.push_back(HipDeterministicForces());
setPropertyDefaultValue(HipDeviceIndex(), "");
......@@ -124,26 +122,20 @@ HipPlatform::HipPlatform() {
setPropertyDefaultValue(HipUseCpuPme(), "false");
setPropertyDefaultValue(HipDisablePmeStream(), "false");
setPropertyDefaultValue(HipDeterministicForces(), "false");
char* compiler = getenv("OPENMM_HIP_COMPILER");
char* rocm_path = getenv("ROCM_PATH");
string hipcc;
if (rocm_path != NULL) {
hipcc = string(rocm_path) + "/bin/hipcc";
} else if (compiler != NULL) {
hipcc = compiler;
} else {
hipcc = "/opt/rocm/bin/hipcc";
}
setPropertyDefaultValue(HipCompiler(), hipcc);
#ifdef _MSC_VER
setPropertyDefaultValue(HipTempDirectory(), string(getenv("TEMP")));
#else
char* tmpdir = getenv("TMPDIR");
string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
setPropertyDefaultValue(HipTempDirectory(), tmp);
char* hostCompiler = getenv("HIP_HOST_COMPILER");
setPropertyDefaultValue(HipHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
#endif
}
double HipPlatform::getSpeed() const {
return 100;
// Reduce the speed of the HIP platform if there are no HIP devices in the system,
// so the OpenCL plaform can be selected as default
int numDevices;
return hipGetDeviceCount(&numDevices) != hipErrorNoDevice ? 100 : 40;
}
bool HipPlatform::supportsDoublePrecision() const {
......@@ -174,12 +166,8 @@ void HipPlatform::contextCreated(ContextImpl& context, const map<string, string>
getPropertyDefaultValue(HipPrecision()) : properties.find(HipPrecision())->second);
string cpuPmePropValue = (properties.find(HipUseCpuPme()) == properties.end() ?
getPropertyDefaultValue(HipUseCpuPme()) : properties.find(HipUseCpuPme())->second);
const string& compilerPropValue = (properties.find(HipCompiler()) == properties.end() ?
getPropertyDefaultValue(HipCompiler()) : properties.find(HipCompiler())->second);
const string& tempPropValue = (properties.find(HipTempDirectory()) == properties.end() ?
getPropertyDefaultValue(HipTempDirectory()) : properties.find(HipTempDirectory())->second);
const string& hostCompilerPropValue = (properties.find(HipHostCompiler()) == properties.end() ?
getPropertyDefaultValue(HipHostCompiler()) : properties.find(HipHostCompiler())->second);
string pmeStreamPropValue = (properties.find(HipDisablePmeStream()) == properties.end() ?
getPropertyDefaultValue(HipDisablePmeStream()) : properties.find(HipDisablePmeStream())->second);
string deterministicForcesValue = (properties.find(HipDeterministicForces()) == properties.end() ?
......@@ -197,8 +185,8 @@ void HipPlatform::contextCreated(ContextImpl& context, const map<string, string>
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, NULL));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
pmeStreamPropValue, deterministicForcesValue, threads, NULL));
}
void HipPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
......@@ -207,14 +195,12 @@ void HipPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& origin
string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), HipUseBlockingSync());
string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), HipPrecision());
string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), HipUseCpuPme());
string compilerPropValue = platform.getPropertyValue(originalContext.getOwner(), HipCompiler());
string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), HipTempDirectory());
string hostCompilerPropValue = platform.getPropertyValue(originalContext.getOwner(), HipHostCompiler());
string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), HipDisablePmeStream());
string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), HipDeterministicForces());
int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
}
void HipPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -223,9 +209,10 @@ void HipPlatform::contextDestroyed(ContextImpl& context) const {
}
HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
const string& cpuPmeProperty, const string& tempProperty, const string& pmeStreamProperty,
const string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false),
threads(numThreads) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
size_t searchPos = 0, nextPos;
......@@ -242,11 +229,11 @@ HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& syst
if (devices[i].length() > 0) {
int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new HipContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
contexts.push_back(new HipContext(system, deviceIndex, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
}
}
if (contexts.size() == 0)
contexts.push_back(new HipContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
contexts.push_back(new HipContext(system, -1, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
......@@ -275,9 +262,7 @@ HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& syst
propertyValues[HipPlatform::HipUseBlockingSync()] = blocking ? "true" : "false";
propertyValues[HipPlatform::HipPrecision()] = precisionProperty;
propertyValues[HipPlatform::HipUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[HipPlatform::HipCompiler()] = compilerProperty;
propertyValues[HipPlatform::HipTempDirectory()] = tempProperty;
propertyValues[HipPlatform::HipHostCompiler()] = hostCompilerProperty;
propertyValues[HipPlatform::HipDisablePmeStream()] = disablePmeStream ? "true" : "false";
propertyValues[HipPlatform::HipDeterministicForces()] = deterministicForces ? "true" : "false";
contextEnergy.resize(contexts.size());
......
......@@ -77,7 +77,7 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
* Sum the energy buffer.
*/
__global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) {
HIP_DYNAMIC_SHARED( mixed, tempBuffer)
extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x;
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
......
......@@ -56,8 +56,8 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
System system;
system.addParticle(0.0);
HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false",
platform.getPropertyDefaultValue(HipPlatform::HipCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipHostCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
OpenMM_SFMT::SFMT sfmt;
......
......@@ -56,8 +56,8 @@ void testGaussian() {
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false",
platform.getPropertyDefaultValue(HipPlatform::HipCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipHostCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -66,8 +66,8 @@ void verifySorting(vector<float> array, bool uniform) {
System system;
system.addParticle(0.0);
HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false",
platform.getPropertyDefaultValue(HipPlatform::HipCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipHostCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
HipArray data(context, array.size(), 4, "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment