# /root/anaconda3/envs/pynx/lib/python3.10/site-packages/pycuda-2024.1.2-py3.10-linux-x86_64.egg/pycuda/compiler.py from pytools import memoize # don't import pycuda.driver here--you'll create an import loop import os import sys from tempfile import mkstemp from os import unlink from pytools.prefork import call_capture_output @memoize def get_nvcc_version(nvcc): cmdline = [nvcc, "--version"] result, stdout, stderr = call_capture_output(cmdline) if result != 0 or not stdout: from warnings import warn warn("NVCC version could not be determined.") stdout = b"nvcc unknown version" return stdout.decode("utf-8", "replace") def _new_md5(): try: import hashlib return hashlib.md5() except ImportError: # for Python << 2.5 import md5 return md5.new() def preprocess_source(source, options, nvcc): handle, source_path = mkstemp(suffix=".cu") outf = open(source_path, "w") outf.write(source) outf.close() os.close(handle) cmdline = [nvcc, "--preprocess"] + options + [source_path] if "win32" in sys.platform: cmdline.extend(["--compiler-options", "-EP"]) else: cmdline.extend(["--compiler-options", "-P"]) result, stdout, stderr = call_capture_output(cmdline, error_on_nonzero=False) if result != 0: from pycuda.driver import CompileError raise CompileError( "nvcc preprocessing of %s failed" % source_path, cmdline, stderr=stderr ) # sanity check if len(stdout) < 0.5 * len(source): from pycuda.driver import CompileError raise CompileError( "nvcc preprocessing of %s failed with ridiculously " "small code output - likely unsupported compiler." % source_path, cmdline, stderr=stderr.decode("utf-8", "replace"), ) unlink(source_path) preprocessed_str = stdout.decode("utf-8", "replace") # remove the temporary filename from the preprocessed source code to get reproducible hashes return preprocessed_str.replace(os.path.basename(source_path), "") def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"): from os.path import join assert target in ["cubin", "ptx", "fatbin"] if cache_dir: checksum = _new_md5() if "#include" in source: checksum.update(preprocess_source(source, options, nvcc).encode("utf-8")) else: checksum.update(source.encode("utf-8")) for option in options: checksum.update(option.encode("utf-8")) checksum.update(get_nvcc_version(nvcc).encode("utf-8")) from pycuda.characterize import platform_bits checksum.update(str(platform_bits()).encode("utf-8")) cache_file = checksum.hexdigest() cache_path = join(cache_dir, cache_file + "." + target) try: cache_file = open(cache_path, "rb") try: return cache_file.read() finally: cache_file.close() except Exception: pass from tempfile import mkdtemp file_dir = mkdtemp() file_root = "kernel" cu_file_name = file_root + ".cu" cu_file_path = join(file_dir, cu_file_name) outf = open(cu_file_path, "w") outf.write(str(source)) outf.close() if keep: options = options[:] options.append("--keep") print("*** compiler output in %s" % file_dir) cmdline = [nvcc, "--" + target] + options + [cu_file_name] result, stdout, stderr = call_capture_output( cmdline, cwd=file_dir, error_on_nonzero=False ) try: # GPUfusion: kernel.cubin <=> kernel.cu-hip-amdgcn-amd-amdhsa.hipfb if target == "cubin": result_f = open(join(file_dir, cu_file_name + "-hip-amdgcn-amd-amdhsa.hipfb"), "rb") print("compile file") print(join(file_dir, cu_file_name + "-hip-amdgcn-amd-amdhsa.hipfb")) # GPUfusion: kernel.ptx <=> kernel-hip-amdgcn-amd-amdhsa-gfx906.bc elif target == "ptx": result_f = open(join(file_dir, file_root + "-hip-amdgcn-amd-amdhsa-gfx906.bc"), "rb") print("compile file") print(join(file_dir, file_root + "-hip-amdgcn-amd-amdhsa-gfx906.bc")) else: result_f = open(join(file_dir, file_root + "." + target), "rb") print("compile file") print(join(file_dir, file_root + "." + target)) except OSError: no_output = True else: no_output = False if result != 0 or (no_output and (stdout or stderr)): if result == 0: from warnings import warn warn( "PyCUDA: nvcc exited with status 0, but appears to have " "encountered an error" ) from pycuda.driver import CompileError raise CompileError( "nvcc compilation of %s failed" % cu_file_path, cmdline, stdout=stdout.decode("utf-8", "replace"), stderr=stderr.decode("utf-8", "replace"), ) if stdout or stderr: lcase_err_text = (stdout + stderr).decode("utf-8", "replace").lower() from warnings import warn if "demoted" in lcase_err_text or "demoting" in lcase_err_text: warn( "nvcc said it demoted types in source code it " "compiled--this is likely not what you want.", stacklevel=4, ) warn( "The CUDA compiler succeeded, but said the following:\n" + (stdout + stderr).decode("utf-8", "replace"), stacklevel=4, ) result_data = result_f.read() result_f.close() if cache_dir: outf = open(cache_path, "wb") outf.write(result_data) outf.close() if not keep: from os import listdir, unlink, rmdir for name in listdir(file_dir): unlink(join(file_dir, name)) rmdir(file_dir) return result_data def _get_per_user_string(): try: from os import getuid except ImportError: checksum = _new_md5() from os import environ checksum.update(environ["USERNAME"].encode("utf-8")) return checksum.hexdigest() else: return "uid%d" % getuid() def _find_pycuda_include_path(): import importlib.util import os return os.path.abspath( os.path.join(importlib.util.find_spec("pycuda").origin, os.path.pardir, "cuda")) DEFAULT_NVCC_FLAGS = [ _flag.strip() for _flag in os.environ.get("PYCUDA_DEFAULT_NVCC_FLAGS", "").split() if _flag.strip() ] def compile( source, nvcc="nvcc", options=None, keep=False, no_extern_c=False, arch=None, code=None, cache_dir=None, include_dirs=[], target="cubin", ): assert target in ["cubin", "ptx", "fatbin"] if not no_extern_c: source = 'extern "C" {\n%s\n}\n' % source if options is None: options = DEFAULT_NVCC_FLAGS options = options[:] if arch is None: from pycuda.driver import Error try: from pycuda.driver import Context arch = "sm_%d%d" % Context.get_device().compute_capability() except Error: pass from pycuda.driver import CUDA_DEBUGGING if CUDA_DEBUGGING: cache_dir = False keep = True options.extend(["-g", "-G"]) if "PYCUDA_CACHE_DIR" in os.environ and cache_dir is None: cache_dir = os.environ["PYCUDA_CACHE_DIR"] if "PYCUDA_DISABLE_CACHE" in os.environ: cache_dir = False if cache_dir is None: import platformdirs cache_dir = os.path.join( platformdirs.user_cache_dir("pycuda", "pycuda"), "compiler-cache-v1" ) from os import makedirs makedirs(cache_dir, exist_ok=True) if arch is not None: options.extend(["-arch", arch]) if code is not None: options.extend(["-code", code]) if "darwin" in sys.platform and sys.maxsize == 9223372036854775807: options.append("-m64") elif "win32" in sys.platform and sys.maxsize == 9223372036854775807: options.append("-m64") elif "win32" in sys.platform and sys.maxsize == 2147483647: options.append("-m32") include_dirs = include_dirs + [_find_pycuda_include_path()] for i in include_dirs: options.append("-I" + i) return compile_plain(source, options, keep, nvcc, cache_dir, target) class CudaModule: def _check_arch(self, arch): if arch is None: return try: from pycuda.driver import Context capability = Context.get_device().compute_capability() if tuple(map(int, tuple(arch.split("_")[1]))) > capability: from warnings import warn warn( "trying to compile for a compute capability " "higher than selected GPU" ) except Exception: pass def _bind_module(self): self.get_global = self.module.get_global self.get_texref = self.module.get_texref if hasattr(self.module, "get_surfref"): self.get_surfref = self.module.get_surfref def get_function(self, name): return self.module.get_function(name) class SourceModule(CudaModule): """ Creates a Module from a single .cu source object linked against the static CUDA runtime. """ def __init__( self, source, nvcc="nvcc", options=None, keep=False, no_extern_c=False, arch=None, code=None, cache_dir=None, include_dirs=[], ): self._check_arch(arch) cubin = compile( source, nvcc, options, keep, no_extern_c, arch, code, cache_dir, include_dirs, ) from pycuda.driver import module_from_buffer self.module = module_from_buffer(cubin) self._bind_module() def _search_on_path(filenames): """Find file on system path.""" # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224 from os.path import exists, abspath, join from os import pathsep, environ search_path = environ["PATH"] paths = search_path.split(pathsep) for path in paths: for filename in filenames: if exists(join(path, filename)): return abspath(join(path, filename)) @memoize def _find_nvcc_on_path(): return _search_on_path(["nvcc", "nvcc.exe"]) class DynamicModule(CudaModule): """ Creates a Module from multiple .cu source, library file and/or data objects linked against the static or dynamic CUDA runtime. """ def __init__( self, nvcc="nvcc", link_options=None, keep=False, no_extern_c=False, arch=None, code=None, cache_dir=None, include_dirs=[], message_handler=None, log_verbose=False, cuda_libdir=None, ): from pycuda.driver import Context compute_capability = Context.get_device().compute_capability() if compute_capability < (3, 5): raise Exception( "Minimum compute capability for dynamic parallelism is 3.5 (found: %u.%u)!" % (compute_capability[0], compute_capability[1]) ) else: from pycuda.driver import Linker self.linker = Linker(message_handler, link_options, log_verbose) self._check_arch(arch) self.nvcc = nvcc self.keep = keep self.no_extern_c = no_extern_c self.arch = arch self.code = code self.cache_dir = cache_dir self.include_dirs = include_dirs self.cuda_libdir = cuda_libdir self.libdir, self.libptn = None, None self.module = None def _locate_cuda_libdir(self): """ Locate the "standard" CUDA SDK library directory in the local file system. Supports 64-Bit Windows, Linux and Mac OS X. In case the caller supplied cuda_libdir in the constructor other than None that value is returned unchecked, else a best-effort attempt is made. Precedence: Windows: cuda_libdir > %CUDA_PATH% Linux: cuda_libdir > $CUDA_ROOT > $LD_LIBRARY_PATH > '/usr/lib/x86_64-linux-gnu' Returns a pair (libdir, libptn) where libdir is None in case of failure or a string containing the absolute path of the directory, and libptn is the %-format pattern to construct library file names from library names on the local system. Raises a RuntimeError in case of failure. Links: - Post-installation Actions http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions TODO: - Is $CUDA_ROOT/lib64 the correct path to assume for 64-Bit CUDA libraries on Linux? - Mac OS X (Darwin) is currently treated like Linux, is that correct? - Check CMake's FindCUDA module, it might contain some helpful clues in its sources https://cmake.org/cmake/help/v3.0/module/FindCUDA.html https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake - Verify all Linux code paths somehow """ from os.path import isfile, join from platform import system as platform_system system = platform_system() libdir, libptn = None, None if system == "Windows": if self.cuda_libdir is not None: libdir = self.cuda_libdir elif "CUDA_PATH" in os.environ and isfile( join(os.environ["CUDA_PATH"], "lib\\x64\\cudadevrt.lib") ): libdir = join(os.environ["CUDA_PATH"], "lib\\x64") libptn = "%s.lib" elif system in ["Linux", "Darwin"]: if self.cuda_libdir is not None: libdir = self.cuda_libdir elif "CUDA_ROOT" in os.environ and isfile( join(os.environ["CUDA_ROOT"], "lib64/libcudadevrt.a") ): libdir = join(os.environ["CUDA_ROOT"], "lib64") elif "LD_LIBRARY_PATH" in os.environ: for ld_path in os.environ["LD_LIBRARY_PATH"].split(":"): if isfile(join(ld_path, "libcudadevrt.a")): libdir = ld_path break if libdir is None and isfile("/usr/lib/x86_64-linux-gnu/libcudadevrt.a"): libdir = "/usr/lib/x86_64-linux-gnu" if libdir is None: nvcc_path = _find_nvcc_on_path() if nvcc_path is not None: libdir = join(os.path.dirname(nvcc_path), "..", "lib64") libptn = "lib%s.a" if libdir is None: raise RuntimeError( "Unable to locate the CUDA SDK installation " "directory, set CUDA library path manually" ) return libdir, libptn def add_source(self, source, nvcc_options=None, name="kernel.ptx"): ptx = compile( source, nvcc=self.nvcc, options=nvcc_options, keep=self.keep, no_extern_c=self.no_extern_c, arch=self.arch, code=self.code, cache_dir=self.cache_dir, include_dirs=self.include_dirs, target="ptx", ) from pycuda.driver import jit_input_type self.linker.add_data(ptx, jit_input_type.PTX, name) return self def add_data(self, data, input_type, name="unknown"): self.linker.add_data(data, input_type, name) return self def add_file(self, filename, input_type): self.linker.add_file(filename, input_type) return self def add_stdlib(self, libname): if self.libdir is None: self.libdir, self.libptn = self._locate_cuda_libdir() from os.path import isfile, join libpath = join(self.libdir, self.libptn % libname) if not isfile(libpath): raise OSError('CUDA SDK library file "%s" not found' % libpath) from pycuda.driver import jit_input_type self.linker.add_file(libpath, jit_input_type.LIBRARY) return self def link(self): self.module = self.linker.link_module() self.linker = None self._bind_module() return self class DynamicSourceModule(DynamicModule): """ Creates a Module from a single .cu source object linked against the dynamic CUDA runtime. - compiler generates PTX relocatable device code (rdc) from source that can be linked with other relocatable device code - source is linked against the CUDA device runtime library cudadevrt - library cudadevrt is statically linked into the generated Module """ def __init__( self, source, nvcc="nvcc", options=None, keep=False, no_extern_c=False, arch=None, code=None, cache_dir=None, include_dirs=[], cuda_libdir=None, ): super().__init__( nvcc=nvcc, link_options=None, keep=keep, no_extern_c=no_extern_c, arch=arch, code=code, cache_dir=cache_dir, include_dirs=include_dirs, cuda_libdir=cuda_libdir, ) if options is None: options = DEFAULT_NVCC_FLAGS options = options[:] if "-rdc=true" not in options: options.append("-rdc=true") if "-lcudadevrt" not in options: options.append("-lcudadevrt") self.add_source(source, nvcc_options=options) # self.add_stdlib("cudadevrt") self.link()