avoid non deterministic arch order in compilation

37d77ca0 · Vijay Korthikanti · d80433e1 · 37d77ca0
Commit 37d77ca0 authored Oct 05, 2020 by Vijay Korthikanti
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 0 deletions

megatron/fused_kernels/__init__.py megatron/fused_kernels/__init__.py +8 -0

No files found.
--- a/megatron/fused_kernels/__init__.py
+++ b/megatron/fused_kernels/__init__.py
@@ -15,8 +15,16 @@
 import pathlib
 import subprocess
+import os
 from torch.utils import cpp_extension
+# Setting this param to a list has a problem of generating
+# different compilation commands (with diferent order of architectures)
+# and leading to recompilation of fused kernels.
+# set it to empty string to avoid recompilation
+# and assign arch flags explicity in extra_cuda_cflags below
+os.environ["TORCH_CUDA_ARCH_LIST"] = ""
 def get_cuda_bare_metal_version(cuda_dir):
    raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], 
                                         universal_newlines=True)