Unverified Commit b9c45d45 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Always use hipRTC, support Windows

* Unload all loaded modules in HipContext's destructor,
  HIP modules keep file desctriptors opened, but OpenMM never unloads
  modules leaking these file descriptors. This can cause crashinf of
  some scripts like test-openmm-platforms from openmmtools.
* ROCm 6.0 defines operator* for complex types (that are typedefs for
  float2 and double2), they conflict with operators defined for vectors.
  This is fixed in newer ROCm versions.
* Revert HIP_DYNAMIC_SHARED back to extern __shared__ (the macro is
  in the headers).
* Reduce the speed of the HIP platform if there are no HIP devices in
  the system.
parent a0acfbc9
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
# Creates OpenMMHIP library. # Creates OpenMMHIP library.
# #
# Windows: # Windows:
# unsupported # OpenMMHIP.dll
# OpenMMHIP.lib
# OpenMMHIP_static.lib
# Unix: # Unix:
# libOpenMMHIP.so # libOpenMMHIP.so
# libOpenMMHIP_static.a # libOpenMMHIP_static.a
......
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#include <utility> #include <utility>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER #ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
// Prevent Windows from defining macros that interfere with other code. // Prevent Windows from defining macros that interfere with other code.
#define NOMINMAX #define NOMINMAX
#endif #endif
...@@ -85,8 +84,7 @@ public: ...@@ -85,8 +84,7 @@ public:
static const int ThreadBlockSize; static const int ThreadBlockSize;
static const int TileSize; static const int TileSize;
HipContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision, HipContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
const std::string& compiler, const std::string& tempDir, const std::string& hostCompiler, HipPlatform::PlatformData& platformData, const std::string& tempDir, HipPlatform::PlatformData& platformData, HipContext* originalContext);
HipContext* originalContext);
~HipContext(); ~HipContext();
/** /**
* This is called to initialize internal data structures after all Forces in the system * This is called to initialize internal data structures after all Forces in the system
...@@ -255,6 +253,18 @@ public: ...@@ -255,6 +253,18 @@ public:
HipArray& getAtomIndexArray() { HipArray& getAtomIndexArray() {
return atomIndexDevice; return atomIndexDevice;
} }
/**
* Get a file name in tempDir unique for the current process and context.
*/
std::string getTempFileName() const;
/**
* Get src hash.
*/
std::string getHash(const std::string& src) const;
/**
* Get a filename in cacheDir based on src hash.
*/
std::string getCacheFileName(const std::string& src) const;
/** /**
* Create a HIP module from source code. * Create a HIP module from source code.
* *
...@@ -555,6 +565,10 @@ public: ...@@ -555,6 +565,10 @@ public:
* expense of reduced simulation performance. * expense of reduced simulation performance.
*/ */
void flushQueue(); void flushQueue();
/**
* Get the flags that should be used when allocating pinned host memory.
*/
unsigned int getHostMallocFlags();
private: private:
/** /**
* Compute a sorted list of device indices in decreasing order of desirability * Compute a sorted list of device indices in decreasing order of desirability
...@@ -571,12 +585,13 @@ private: ...@@ -571,12 +585,13 @@ private:
int multiprocessors; int multiprocessors;
int sharedMemPerBlock; int sharedMemPerBlock;
bool supportsHardwareFloatGlobalAtomicAdd; bool supportsHardwareFloatGlobalAtomicAdd;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasCompilerKernel, isHipccAvailable, hasAssignedPosqCharges; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasAssignedPosqCharges;
bool isLinkedContext; bool isLinkedContext;
std::string compiler, tempDir, cacheDir, gpuArchitecture; std::string tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
std::map<std::string, std::string> compilationDefines; std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules;
hipDevice_t device; hipDevice_t device;
hipStream_t currentStream; hipStream_t currentStream;
hipFunction_t clearBufferKernel; hipFunction_t clearBufferKernel;
...@@ -605,7 +620,6 @@ private: ...@@ -605,7 +620,6 @@ private:
HipExpressionUtilities* expression; HipExpressionUtilities* expression;
HipBondedUtilities* bonded; HipBondedUtilities* bonded;
HipNonbondedUtilities* nonbonded; HipNonbondedUtilities* nonbonded;
Kernel compilerKernel;
}; };
/** /**
......
...@@ -40,27 +40,6 @@ ...@@ -40,27 +40,6 @@
namespace OpenMM { namespace OpenMM {
/**
* This abstract class defines an interface for code that can compile CUDA kernels. This allows a plugin to take advantage of runtime compilation
* when running on recent versions of CUDA.
*/
class HipCompilerKernel : public KernelImpl {
public:
static std::string Name() {
return "HipCompilerKernel";
}
HipCompilerKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
}
/**
* Compile a kernel to PTX.
*
* @param source the source code for the kernel
* @param options the flags to be passed to the compiler
* @param cu the HipContext for which the kernel is being compiled
*/
virtual std::string createModule(const std::string& source, const std::string& flags, HipContext& cu) = 0;
};
/** /**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the * This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any * Platform a chance to clear buffers and do other initialization at the beginning, and to do any
......
...@@ -91,20 +91,6 @@ public: ...@@ -91,20 +91,6 @@ public:
static const std::string key = "UseCpuPme"; static const std::string key = "UseCpuPme";
return key; return key;
} }
/**
* This is the name of the parameter for specifying the path to the HIP compiler.
*/
static const std::string& HipCompiler() {
static const std::string key = "HipCompiler";
return key;
}
/**
* This is the name of the parameter for specifying the host compiler for the HIP compiler to use.
*/
static const std::string& HipHostCompiler() {
static const std::string key = "HipHostCompiler";
return key;
}
/** /**
* This is the name of the parameter for specifying the path to the directory for creating temporary files. * This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/ */
...@@ -131,7 +117,7 @@ public: ...@@ -131,7 +117,7 @@ public:
class OPENMM_EXPORT_COMMON HipPlatform::PlatformData { class OPENMM_EXPORT_COMMON HipPlatform::PlatformData {
public: public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty, PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty, const std::string& cpuPmeProperty, const std::string& tempProperty,
const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext); const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext);
~PlatformData(); ~PlatformData();
void initializeContexts(const System& system); void initializeContexts(const System& system);
......
...@@ -26,9 +26,8 @@ ...@@ -26,9 +26,8 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#ifdef WIN32 #ifdef WIN32
#error "Windows unsupported for HIP platform" #define _USE_MATH_DEFINES // Needed to get M_PI
#endif #endif
#include <cmath>
#include "HipContext.h" #include "HipContext.h"
#include "HipArray.h" #include "HipArray.h"
#include "HipBondedUtilities.h" #include "HipBondedUtilities.h"
...@@ -46,15 +45,18 @@ ...@@ -46,15 +45,18 @@
#include "HipExpressionUtilities.h" #include "HipExpressionUtilities.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include <algorithm> #include <algorithm>
#include <cmath>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <stack>
#include <thread>
#include <typeinfo> #include <typeinfo>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <hip/hiprtc.h>
#define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage); #define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage);
...@@ -65,6 +67,13 @@ ...@@ -65,6 +67,13 @@
throw OpenMMException(m.str());\ throw OpenMMException(m.str());\
} }
#define HIPRTC_CHECK_RESULT(result, prefix) \
if (result != HIPRTC_SUCCESS) { \
stringstream m; \
m<<prefix<<": "<<hiprtcGetErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
}
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -73,27 +82,10 @@ const int HipContext::TileSize = 32; ...@@ -73,27 +82,10 @@ const int HipContext::TileSize = 32;
bool HipContext::hasInitializedHip = false; bool HipContext::hasInitializedHip = false;
HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& tempDir, HipPlatform::PlatformData& platformData,
const string& tempDir, const std::string& hostCompiler, HipPlatform::PlatformData& platformData, HipContext* originalContext) : ComputeContext(system), currentStream(0), HipContext* originalContext) : ComputeContext(system), currentStream(0), platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false),
platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL),
hasCompilerKernel(false), isHipccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL),
supportsHardwareFloatGlobalAtomicAdd(false) { supportsHardwareFloatGlobalAtomicAdd(false) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) {
try {
compilerKernel = platformData.context->getPlatform().createKernel(HipCompilerKernel::Name(), *platformData.context);
hasCompilerKernel = true;
}
catch (...) {
// The runtime compiler plugin isn't available.
}
}
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str());
struct stat info;
isHipccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
if (!hasInitializedHip) { if (!hasInitializedHip) {
CHECK_RESULT2(hipInit(0), "Error initializing HIP"); CHECK_RESULT2(hipInit(0), "Error initializing HIP");
hasInitializedHip = true; hasInitializedHip = true;
...@@ -114,8 +106,13 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -114,8 +106,13 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
throw OpenMMException("Illegal value for Precision: "+precision); throw OpenMMException("Illegal value for Precision: "+precision);
char* cacheVariable = getenv("OPENMM_CACHE_DIR"); char* cacheVariable = getenv("OPENMM_CACHE_DIR");
cacheDir = (cacheVariable == NULL ? tempDir : string(cacheVariable)); cacheDir = (cacheVariable == NULL ? tempDir : string(cacheVariable));
#ifdef WIN32
this->tempDir = tempDir+"\\";
cacheDir = cacheDir+"\\";
#else
this->tempDir = tempDir+"/"; this->tempDir = tempDir+"/";
cacheDir = cacheDir+"/"; cacheDir = cacheDir+"/";
#endif
contextIndex = platformData.contexts.size(); contextIndex = platformData.contexts.size();
string errorMessage = "Error initializing Context"; string errorMessage = "Error initializing Context";
if (originalContext == NULL) { if (originalContext == NULL) {
...@@ -366,6 +363,8 @@ HipContext::~HipContext() { ...@@ -366,6 +363,8 @@ HipContext::~HipContext() {
delete bonded; delete bonded;
if (nonbonded != NULL) if (nonbonded != NULL)
delete nonbonded; delete nonbonded;
for (auto module : loadedModules)
hipModuleUnload(module);
contextIsValid = false; contextIsValid = false;
} }
...@@ -377,19 +376,19 @@ void HipContext::initialize() { ...@@ -377,19 +376,19 @@ void HipContext::initialize() {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, 1, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0)); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, 1, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0)); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else { else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum"); energySum.initialize<float>(*this, 1, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0)); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
} }
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i); double mass = system.getParticleMass(i);
...@@ -423,15 +422,46 @@ void HipContext::setAsCurrent() { ...@@ -423,15 +422,46 @@ void HipContext::setAsCurrent() {
hipSetDevice(device); hipSetDevice(device);
} }
string HipContext::getTempFileName() const {
stringstream tempFileName;
tempFileName << tempDir;
tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions.
tempFileName << "_" << std::this_thread::get_id();
return tempFileName.str();
}
string HipContext::getHash(const string& src) const {
CSHA1 sha1;
sha1.Update((const UINT_8*) src.c_str(), src.size());
sha1.Final();
UINT_8 hash[20];
sha1.GetHash(hash);
stringstream cacheHash;
cacheHash.flags(ios::hex);
for (int i = 0; i < 20; i++)
cacheHash << setw(2) << setfill('0') << (int) hash[i];
return cacheHash.str();
}
string HipContext::getCacheFileName(const string& src) const {
stringstream cacheFile;
cacheFile << cacheDir << "openmm-hip-" << getHash(src + gpuArchitecture);
return cacheFile.str();
}
hipModule_t HipContext::createModule(const string source) { hipModule_t HipContext::createModule(const string source) {
return createModule(source, map<string, string>()); return createModule(source, map<string, string>());
} }
hipModule_t HipContext::createModule(const string source, const map<string, string>& defines) { hipModule_t HipContext::createModule(const string source, const map<string, string>& defines) {
const char* saveTempsEnv = getenv("OPENMM_SAVE_TEMPS"); const char* saveTempsEnv = getenv("OPENMM_SAVE_TEMPS");
bool saveTemps = saveTempsEnv != nullptr; const bool saveTemps = saveTempsEnv != nullptr && string(saveTempsEnv) == "1";
string bits = intToString(8*sizeof(void*));
string options = "-ffast-math -munsafe-fp-atomics -Wall"; int runtimeVersion;
CHECK_RESULT2(hipRuntimeGetVersion(&runtimeVersion), "Error getting HIP runtime version");
string options = "-O3 -ffast-math -munsafe-fp-atomics -Wall -Wno-hip-only";
options += " --offload-arch=" + gpuArchitecture;
if (gpuArchitecture.find("gfx90a") == 0 || if (gpuArchitecture.find("gfx90a") == 0 ||
gpuArchitecture.find("gfx94") == 0) { gpuArchitecture.find("gfx94") == 0) {
// HIP-TODO: Remove it when the compiler does a better job // HIP-TODO: Remove it when the compiler does a better job
...@@ -442,9 +472,15 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri ...@@ -442,9 +472,15 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri
if (getMaxThreadBlockSize() < 1024) { if (getMaxThreadBlockSize() < 1024) {
options += " --gpu-max-threads-per-block=" + std::to_string(getMaxThreadBlockSize()); options += " --gpu-max-threads-per-block=" + std::to_string(getMaxThreadBlockSize());
} }
if (runtimeVersion < 60140092) {
// Workaround for operator* defined for complex types (typedefs for float2, double2) in
// ROCm 6.0 headers. This issue has been fixed in 6.1. hipRTC includes amd_hip_complex.h
// by default, we fool the include guard into thinking the header is already included.
options += " -D HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H";
}
stringstream src; stringstream src;
if (!options.empty())
src << "// Compilation Options: " << options << endl << endl; src << "// Compilation Options: " << options << endl << endl;
src << "// HIP Runtime Version: " << runtimeVersion << endl << endl;
for (auto& pair : compilationDefines) { for (auto& pair : compilationDefines) {
// Query defines to avoid duplicate variables // Query defines to avoid duplicate variables
if (defines.find(pair.first) == defines.end()) { if (defines.find(pair.first) == defines.end()) {
...@@ -457,11 +493,6 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri ...@@ -457,11 +493,6 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri
if (!compilationDefines.empty()) if (!compilationDefines.empty())
src << endl; src << endl;
// include the main header for built-in variables (threadIdx etc.) and functions
src << "#include \"hip/hip_runtime.h\"\n";
// include the vector types
src << "#include \"hip/hip_vector_types.h\"\n";
if (useDoublePrecision) { if (useDoublePrecision) {
src << "typedef double real;\n"; src << "typedef double real;\n";
src << "typedef double2 real2;\n"; src << "typedef double2 real2;\n";
...@@ -501,113 +532,88 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri ...@@ -501,113 +532,88 @@ hipModule_t HipContext::createModule(const string source, const map<string, stri
// See whether we already have PTX for this kernel cached. // See whether we already have PTX for this kernel cached.
CSHA1 sha1; string cacheFile = getCacheFileName(src.str());
sha1.Update((const UINT_8*) src.str().c_str(), src.str().size());
sha1.Final();
UINT_8 hash[20];
sha1.GetHash(hash);
stringstream cacheHash;
cacheHash.flags(ios::hex);
for (int i = 0; i < 20; i++)
cacheHash << setw(2) << setfill('0') << (int) hash[i];
stringstream cacheFile;
cacheFile << cacheDir << cacheHash.str() << '_' << gpuArchitecture << '_' << bits;
hipModule_t module; hipModule_t module;
if (hipModuleLoad(&module, cacheFile.str().c_str()) == hipSuccess) if (hipModuleLoad(&module, cacheFile.c_str()) == hipSuccess) {
loadedModules.push_back(module);
return module; return module;
}
// Select names for the various temporary files. // Select names for the various temporary files.
stringstream tempFileName;
if (saveTemps) { if (saveTemps) {
tempFileName << saveTempsEnv; stringstream tempFileName;
const char* saveTempsPrefixEnv = getenv("OPENMM_SAVE_TEMPS_PREFIX"); const char* saveTempsPrefixEnv = getenv("OPENMM_SAVE_TEMPS_PREFIX");
if (saveTempsPrefixEnv) { if (saveTempsPrefixEnv) {
tempFileName << saveTempsPrefixEnv; tempFileName << saveTempsPrefixEnv;
} }
tempFileName << cacheHash.str(); tempFileName << getHash(src.str());
}
else { options += " --save-temps";
tempFileName << tempDir;
tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions. string inputFile = (tempFileName.str()+".hip");
tempFileName << "_" << getpid(); std::cout << "Source code: " << inputFile << std::endl;
std::cout << "Compile options: " << options << std::endl;
ofstream out(inputFile.c_str());
out << src.str();
out.close();
} }
string inputFile = (tempFileName.str()+".hip.cpp");
string outputFile = (tempFileName.str()+".hsaco");
string logFile = (tempFileName.str()+".log");
int res = 0;
// If the runtime compiler plugin is available, use it. // Split the command line options into an array of options.
if (hasCompilerKernel) { stringstream flagsStream(options);
string ptx = compilerKernel.getAs<HipCompilerKernel>().createModule(src.str(), options, *this); string flag;
vector<string> splitFlags;
while (flagsStream >> flag)
splitFlags.push_back(flag);
int numOptions = splitFlags.size();
vector<const char*> optionsVec(numOptions);
for (int i = 0; i < numOptions; i++)
optionsVec[i] = &splitFlags[i][0];
// If possible, write the PTX out to a temporary file so we can cache it for later use. // Compile the program to CO.
bool wroteCache = false; hiprtcProgram program;
HIPRTC_CHECK_RESULT(hiprtcCreateProgram(&program, src.str().c_str(), NULL, 0, NULL, NULL), "Error creating program");
try { try {
ofstream out(outputFile.c_str()); hiprtcResult result = hiprtcCompileProgram(program, optionsVec.size(), &optionsVec[0]);
out << ptx; if (result != HIPRTC_SUCCESS || saveTemps) {
out.close(); size_t logSize;
if (!out.fail()) hiprtcGetProgramLogSize(program, &logSize);
wroteCache = true; std::string log(logSize, '\0');
if (logSize > 0) {
hiprtcGetProgramLog(program, &log[0]);
if (saveTemps) {
std::cout << "Log: " << log << std::endl;
} }
catch (...) {
// Ignore.
} }
if (!wroteCache) { if (result != HIPRTC_SUCCESS) {
// An error occurred. Possibly we don't have permission to write to the temp directory. Just try to load the module directly. throw OpenMMException("Error compiling program: "+log);
CHECK_RESULT2(hipModuleLoadDataEx(&module, &ptx[0], 0, NULL, NULL), "Error loading HIP module");
return module;
} }
} }
else { size_t codeSize;
// Write out the source to a temporary file. hiprtcGetCodeSize(program, &codeSize);
vector<char> code(codeSize);
hiprtcGetCode(program, &code[0]);
hiprtcDestroyProgram(&program);
ofstream out(inputFile.c_str()); // If possible, write the CO out to a cache file for later use.
out << src.str();
out.close();
string command = compiler + " --genco --amdgpu-target=" + gpuArchitecture + " " + options + (saveTemps ? " -save-temps=obj" : "") +" -o \""+outputFile+"\" " + " \""+inputFile+"\" 2> \""+logFile+"\"";
res = std::system(command.c_str());
}
try {
if (res != 0) {
// Load the error log.
stringstream error; try {
error << "Error launching HIP compiler: " << res; ofstream out(cacheFile.c_str(), ios::out | ios::binary);
ifstream log(logFile.c_str()); out.write(&code[0], code.size());
if (log.is_open()) { out.close();
string line;
while (!log.eof()) {
getline(log, line);
error << '\n' << line;
}
log.close();
}
throw OpenMMException(error.str());
}
hipError_t result = hipModuleLoad(&module, outputFile.c_str());
if (result != hipSuccess) {
std::stringstream m;
m<<"Error loading HIP module: "<<getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
} }
if (!saveTemps) { catch (...) {
remove(inputFile.c_str()); // An error occurred. Possibly we don't have permission to write to the temp directory.
remove(logFile.c_str()); // Ignore.
} }
if (rename(outputFile.c_str(), cacheFile.str().c_str()) != 0 && !saveTemps) CHECK_RESULT2(hipModuleLoadDataEx(&module, &code[0], 0, NULL, NULL), "Error loading HIP module");
remove(outputFile.c_str()); loadedModules.push_back(module);
return module; return module;
} }
catch (...) { catch (...) {
if (!saveTemps) { hiprtcDestroyProgram(&program);
remove(inputFile.c_str());
remove(outputFile.c_str());
remove(logFile.c_str());
}
throw; throw;
} }
} }
...@@ -834,3 +840,11 @@ vector<int> HipContext::getDevicePrecedence() { ...@@ -834,3 +840,11 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence; return precedence;
} }
unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32
return hipHostMallocDefault;
#else
return hipHostMallocNumaUser;
#endif
}
...@@ -42,7 +42,7 @@ using namespace std; ...@@ -42,7 +42,7 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system), HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) { ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA"); CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), hipHostMallocMapped), "Error allocating pinned memory"); CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory"); CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
} }
......
...@@ -28,9 +28,6 @@ ...@@ -28,9 +28,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
#endif
#include "openmm/common/windowsExportCommon.h" #include "openmm/common/windowsExportCommon.h"
#include <string> #include <string>
......
...@@ -70,7 +70,7 @@ HipNonbondedUtilities::HipNonbondedUtilities(HipContext& context) : context(cont ...@@ -70,7 +70,7 @@ HipNonbondedUtilities::HipNonbondedUtilities(HipContext& context) : context(cont
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
CHECK_RESULT(hipEventCreateWithFlags(&downloadCountEvent, context.getEventFlags())); CHECK_RESULT(hipEventCreateWithFlags(&downloadCountEvent, context.getEventFlags()));
CHECK_RESULT(hipHostMalloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), hipHostMallocPortable)); CHECK_RESULT(hipHostMalloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), context.getHostMallocFlags()));
numForceThreadBlocks = 5*4*context.getMultiprocessors(); numForceThreadBlocks = 5*4*context.getMultiprocessors();
forceThreadBlockSize = 64; forceThreadBlockSize = 64;
findInteractingBlocksThreadBlockSize = context.getSIMDWidth(); findInteractingBlocksThreadBlockSize = context.getSIMDWidth();
......
...@@ -43,7 +43,6 @@ if (result != hipSuccess) { \ ...@@ -43,7 +43,6 @@ if (result != hipSuccess) { \
* Get the current clock time, measured in microseconds. * Get the current clock time, measured in microseconds.
*/ */
#ifdef _MSC_VER #ifdef _MSC_VER
#error "Windows unsupported for HIP platform"
#include <Windows.h> #include <Windows.h>
static long long getTime() { static long long getTime() {
FILETIME ft; FILETIME ft;
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
#include <sstream> #include <sstream>
#include <cstdio> #include <cstdio>
#ifdef _MSC_VER #ifdef _MSC_VER
#error "Windows unsupported for HIP platform" #include <Windows.h>
#endif #endif
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -112,9 +112,7 @@ HipPlatform::HipPlatform() { ...@@ -112,9 +112,7 @@ HipPlatform::HipPlatform() {
platformProperties.push_back(HipUseBlockingSync()); platformProperties.push_back(HipUseBlockingSync());
platformProperties.push_back(HipPrecision()); platformProperties.push_back(HipPrecision());
platformProperties.push_back(HipUseCpuPme()); platformProperties.push_back(HipUseCpuPme());
platformProperties.push_back(HipCompiler());
platformProperties.push_back(HipTempDirectory()); platformProperties.push_back(HipTempDirectory());
platformProperties.push_back(HipHostCompiler());
platformProperties.push_back(HipDisablePmeStream()); platformProperties.push_back(HipDisablePmeStream());
platformProperties.push_back(HipDeterministicForces()); platformProperties.push_back(HipDeterministicForces());
setPropertyDefaultValue(HipDeviceIndex(), ""); setPropertyDefaultValue(HipDeviceIndex(), "");
...@@ -124,26 +122,20 @@ HipPlatform::HipPlatform() { ...@@ -124,26 +122,20 @@ HipPlatform::HipPlatform() {
setPropertyDefaultValue(HipUseCpuPme(), "false"); setPropertyDefaultValue(HipUseCpuPme(), "false");
setPropertyDefaultValue(HipDisablePmeStream(), "false"); setPropertyDefaultValue(HipDisablePmeStream(), "false");
setPropertyDefaultValue(HipDeterministicForces(), "false"); setPropertyDefaultValue(HipDeterministicForces(), "false");
char* compiler = getenv("OPENMM_HIP_COMPILER"); #ifdef _MSC_VER
char* rocm_path = getenv("ROCM_PATH"); setPropertyDefaultValue(HipTempDirectory(), string(getenv("TEMP")));
string hipcc; #else
if (rocm_path != NULL) {
hipcc = string(rocm_path) + "/bin/hipcc";
} else if (compiler != NULL) {
hipcc = compiler;
} else {
hipcc = "/opt/rocm/bin/hipcc";
}
setPropertyDefaultValue(HipCompiler(), hipcc);
char* tmpdir = getenv("TMPDIR"); char* tmpdir = getenv("TMPDIR");
string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir)); string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
setPropertyDefaultValue(HipTempDirectory(), tmp); setPropertyDefaultValue(HipTempDirectory(), tmp);
char* hostCompiler = getenv("HIP_HOST_COMPILER"); #endif
setPropertyDefaultValue(HipHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
} }
double HipPlatform::getSpeed() const { double HipPlatform::getSpeed() const {
return 100; // Reduce the speed of the HIP platform if there are no HIP devices in the system,
// so the OpenCL plaform can be selected as default
int numDevices;
return hipGetDeviceCount(&numDevices) != hipErrorNoDevice ? 100 : 40;
} }
bool HipPlatform::supportsDoublePrecision() const { bool HipPlatform::supportsDoublePrecision() const {
...@@ -174,12 +166,8 @@ void HipPlatform::contextCreated(ContextImpl& context, const map<string, string> ...@@ -174,12 +166,8 @@ void HipPlatform::contextCreated(ContextImpl& context, const map<string, string>
getPropertyDefaultValue(HipPrecision()) : properties.find(HipPrecision())->second); getPropertyDefaultValue(HipPrecision()) : properties.find(HipPrecision())->second);
string cpuPmePropValue = (properties.find(HipUseCpuPme()) == properties.end() ? string cpuPmePropValue = (properties.find(HipUseCpuPme()) == properties.end() ?
getPropertyDefaultValue(HipUseCpuPme()) : properties.find(HipUseCpuPme())->second); getPropertyDefaultValue(HipUseCpuPme()) : properties.find(HipUseCpuPme())->second);
const string& compilerPropValue = (properties.find(HipCompiler()) == properties.end() ?
getPropertyDefaultValue(HipCompiler()) : properties.find(HipCompiler())->second);
const string& tempPropValue = (properties.find(HipTempDirectory()) == properties.end() ? const string& tempPropValue = (properties.find(HipTempDirectory()) == properties.end() ?
getPropertyDefaultValue(HipTempDirectory()) : properties.find(HipTempDirectory())->second); getPropertyDefaultValue(HipTempDirectory()) : properties.find(HipTempDirectory())->second);
const string& hostCompilerPropValue = (properties.find(HipHostCompiler()) == properties.end() ?
getPropertyDefaultValue(HipHostCompiler()) : properties.find(HipHostCompiler())->second);
string pmeStreamPropValue = (properties.find(HipDisablePmeStream()) == properties.end() ? string pmeStreamPropValue = (properties.find(HipDisablePmeStream()) == properties.end() ?
getPropertyDefaultValue(HipDisablePmeStream()) : properties.find(HipDisablePmeStream())->second); getPropertyDefaultValue(HipDisablePmeStream()) : properties.find(HipDisablePmeStream())->second);
string deterministicForcesValue = (properties.find(HipDeterministicForces()) == properties.end() ? string deterministicForcesValue = (properties.find(HipDeterministicForces()) == properties.end() ?
...@@ -197,8 +185,8 @@ void HipPlatform::contextCreated(ContextImpl& context, const map<string, string> ...@@ -197,8 +185,8 @@ void HipPlatform::contextCreated(ContextImpl& context, const map<string, string>
char* threadsEnv = getenv("OPENMM_CPU_THREADS"); char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL) if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads; stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, NULL)); pmeStreamPropValue, deterministicForcesValue, threads, NULL));
} }
void HipPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const { void HipPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
...@@ -207,14 +195,12 @@ void HipPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& origin ...@@ -207,14 +195,12 @@ void HipPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& origin
string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), HipUseBlockingSync()); string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), HipUseBlockingSync());
string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), HipPrecision()); string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), HipPrecision());
string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), HipUseCpuPme()); string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), HipUseCpuPme());
string compilerPropValue = platform.getPropertyValue(originalContext.getOwner(), HipCompiler());
string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), HipTempDirectory()); string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), HipTempDirectory());
string hostCompilerPropValue = platform.getPropertyValue(originalContext.getOwner(), HipHostCompiler());
string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), HipDisablePmeStream()); string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), HipDisablePmeStream());
string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), HipDeterministicForces()); string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), HipDeterministicForces());
int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads(); int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, &originalContext)); pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
} }
void HipPlatform::contextDestroyed(ContextImpl& context) const { void HipPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -223,9 +209,10 @@ void HipPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -223,9 +209,10 @@ void HipPlatform::contextDestroyed(ContextImpl& context) const {
} }
HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty, HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty, const string& cpuPmeProperty, const string& tempProperty, const string& pmeStreamProperty,
const string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext) : const string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) { context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false),
threads(numThreads) {
bool blocking = (blockingProperty == "true"); bool blocking = (blockingProperty == "true");
vector<string> devices; vector<string> devices;
size_t searchPos = 0, nextPos; size_t searchPos = 0, nextPos;
...@@ -242,11 +229,11 @@ HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& syst ...@@ -242,11 +229,11 @@ HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& syst
if (devices[i].length() > 0) { if (devices[i].length() > 0) {
int deviceIndex; int deviceIndex;
stringstream(devices[i]) >> deviceIndex; stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new HipContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i]))); contexts.push_back(new HipContext(system, deviceIndex, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
} }
} }
if (contexts.size() == 0) if (contexts.size() == 0)
contexts.push_back(new HipContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0]))); contexts.push_back(new HipContext(system, -1, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
} }
catch (...) { catch (...) {
// If an exception was thrown, do our best to clean up memory. // If an exception was thrown, do our best to clean up memory.
...@@ -275,9 +262,7 @@ HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& syst ...@@ -275,9 +262,7 @@ HipPlatform::PlatformData::PlatformData(ContextImpl* context, const System& syst
propertyValues[HipPlatform::HipUseBlockingSync()] = blocking ? "true" : "false"; propertyValues[HipPlatform::HipUseBlockingSync()] = blocking ? "true" : "false";
propertyValues[HipPlatform::HipPrecision()] = precisionProperty; propertyValues[HipPlatform::HipPrecision()] = precisionProperty;
propertyValues[HipPlatform::HipUseCpuPme()] = useCpuPme ? "true" : "false"; propertyValues[HipPlatform::HipUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[HipPlatform::HipCompiler()] = compilerProperty;
propertyValues[HipPlatform::HipTempDirectory()] = tempProperty; propertyValues[HipPlatform::HipTempDirectory()] = tempProperty;
propertyValues[HipPlatform::HipHostCompiler()] = hostCompilerProperty;
propertyValues[HipPlatform::HipDisablePmeStream()] = disablePmeStream ? "true" : "false"; propertyValues[HipPlatform::HipDisablePmeStream()] = disablePmeStream ? "true" : "false";
propertyValues[HipPlatform::HipDeterministicForces()] = deterministicForces ? "true" : "false"; propertyValues[HipPlatform::HipDeterministicForces()] = deterministicForces ? "true" : "false";
contextEnergy.resize(contexts.size()); contextEnergy.resize(contexts.size());
......
...@@ -77,7 +77,7 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res ...@@ -77,7 +77,7 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
* Sum the energy buffer. * Sum the energy buffer.
*/ */
__global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) { __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) {
HIP_DYNAMIC_SHARED( mixed, tempBuffer) extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x; const unsigned int thread = threadIdx.x;
mixed sum = 0; mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x) for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
......
...@@ -56,8 +56,8 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { ...@@ -56,8 +56,8 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false", HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false",
platform.getPropertyDefaultValue(HipPlatform::HipCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipHostCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
......
...@@ -56,8 +56,8 @@ void testGaussian() { ...@@ -56,8 +56,8 @@ void testGaussian() {
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false", HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false",
platform.getPropertyDefaultValue(HipPlatform::HipCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipHostCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -66,8 +66,8 @@ void verifySorting(vector<float> array, bool uniform) { ...@@ -66,8 +66,8 @@ void verifySorting(vector<float> array, bool uniform) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false", HipPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("HipPrecision"), "false",
platform.getPropertyDefaultValue(HipPlatform::HipCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()), platform.getPropertyDefaultValue(HipPlatform::HipTempDirectory()),
platform.getPropertyDefaultValue(HipPlatform::HipHostCompiler()), platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
HipArray data(context, array.size(), 4, "sortData"); HipArray data(context, array.size(), 4, "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment