Unverified Commit 67723598 authored by Zhekai Zhang's avatar Zhekai Zhang Committed by GitHub
Browse files

Merge pull request #157 from mit-han-lab/dev

Fix missing third_party & merge main branch of dev repo
parents e1c5f3e4 75dad570
......@@ -129,7 +129,7 @@ if __name__ == "__main__":
for target in sm_targets:
NVCC_FLAGS += ["-gencode", f"arch=compute_{target},code=sm_{target}"]
NVCC_MSVC_FLAGS = ["-Xcompiler", "/Zc:__cplusplus", "-Xcompiler", "/FS"]
NVCC_MSVC_FLAGS = ["-Xcompiler", "/Zc:__cplusplus", "-Xcompiler", "/FS", "-Xcompiler", "/bigobj"]
nunchaku_extension = CUDAExtension(
name="nunchaku._C",
......
......@@ -215,6 +215,11 @@ struct LayerOffloadHelper {
if (offload) {
streamCompute = std::make_unique<CUDAStreamWrapper>();
streamLoad = std::make_unique<CUDAStreamWrapper>();
needWorkaround = checkWorkaround();
if (needWorkaround) {
spdlog::debug("Offloading helper: use WDDM workaround");
}
}
}
......@@ -240,6 +245,7 @@ private:
funcCompute(layer);
nextComputeDone = std::make_unique<CUDAEventWrapper>();
checkCUDA(cudaEventRecord(nextComputeDone->event, getCurrentCUDAStream()));
workaroundFlush();
}
{
......@@ -253,10 +259,13 @@ private:
}
nextLoadDone = std::make_unique<CUDAEventWrapper>();
checkCUDA(cudaEventRecord(nextLoadDone->event, getCurrentCUDAStream()));
workaroundFlush();
}
eventComputeDone = std::move(nextComputeDone);
eventLoadDone = std::move(nextLoadDone);
workaroundSynchronize();
}
}
......@@ -266,4 +275,35 @@ private:
}
checkCUDA(cudaStreamWaitEvent(getCurrentCUDAStream(), event->event));
}
// WDDM prevents multiple streams run concurrently
// use flush and synchronize to work around
bool needWorkaround;
static bool checkWorkaround() {
if (char *env = getenv("NUNCHAKU_OFFLOAD_WDDM_WORKAROUND")) {
if (std::string(env) == "1") {
return true;
} else if (std::string(env) == "0") {
return false;
}
}
#ifdef _WIN32
return true;
#else
return false;
#endif
}
void workaroundFlush() {
if (!needWorkaround) {
return;
}
cudaStreamQuery(getCurrentCUDAStream());
}
void workaroundSynchronize() {
if (!needWorkaround) {
return;
}
checkCUDA(cudaEventSynchronize(eventComputeDone->event));
}
};
\ No newline at end of file
Subproject commit 0d23f715690c5171fd93679de8afd149376db167
Subproject commit a75b4ac483166189a45290783cb0a18af5ff0ea5
Subproject commit 63258397761b3dd96dd171e5a5ad5aa915834c35
Subproject commit 8b6b7d878c89e81614d05edca7936de41ccdd2da
Subproject commit 27cb4c76708608465c413f6d0e6b8d99a4d84302
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment