Fix SM80 compilation

be8053d6 · Chenggang Zhao · 227c3589 · be8053d6 · be8053d6
Commit be8053d6 authored Jul 31, 2025 by Chenggang Zhao
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

csrc/kernels/utils.cuh csrc/kernels/utils.cuh +1 -1

setup.py setup.py +3 -1

No files found.
--- a/csrc/kernels/utils.cuh
+++ b/csrc/kernels/utils.cuh
@@ -158,7 +158,7 @@ __device__  __forceinline__ int64_t ld_volatile_global(const uint64_t *ptr) {
 #ifndef DISABLE_AGGRESSIVE_PTX_INSTRS
 #define LD_NC_FUNC "ld.global.nc.L1::no_allocate.L2::256B"
 #else
-#define LD_NC_FUNC "ld.volatile.global.L2::256B"
+#define LD_NC_FUNC "ld.volatile.global"
 #endif

 // `ld.global.nc.L1::no_allocate` will be translated into `LDG.E.NA.[width].CONSTANT` in SASS

--- a/setup.py
+++ b/setup.py
@@ -5,13 +5,15 @@ import importlib
 import importlib.resources
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension

-# Wheel specific: The wheels only include the soname of the host library (libnvshmem_host.so.X)
+
+# Wheel specific: the wheels only include the soname of the host library `libnvshmem_host.so.X`
 def get_nvshmem_host_lib_name():
    for path in importlib.resources.files('nvidia.nvshmem').iterdir():
        for file in path.rglob('libnvshmem_host.so.*'):
            return file.name
    raise ModuleNotFoundError('libnvshmem_host.so not found')

+
 if __name__ == '__main__':
    disable_nvshmem = False
    nvshmem_dir = os.getenv('NVSHMEM_DIR', None)