[Misc] AMD Build Improvements (#12923)

f1042e86 · Shiyan Deng · GitHub · 7c4033ac · f1042e86 · f1042e86
Unverified Commit f1042e86 authored Feb 12, 2025 by Shiyan Deng Committed by GitHub Feb 12, 2025
4 changed files
--- a/csrc/moe/moe_align_sum_kernels.cu
+++ b/csrc/moe/moe_align_sum_kernels.cu
@@ -3,7 +3,7 @@
 #include <c10/cuda/CUDAGuard.h>
 #include <ATen/ATen.h>
-#include <THC/THCAtomics.cuh>
+#include <ATen/cuda/Atomic.cuh>
 #include "../cuda_compat.h"
 #include "../dispatch_utils.h"

--- a/csrc/rocm/attention.cu
+++ b/csrc/rocm/attention.cu
@@ -1122,4 +1122,4 @@ void paged_attention(
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
 #undef DIVIDE_ROUND_UP
\ No newline at end of file
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -205,6 +205,14 @@ _VLLM_MODELS = {
    **_FALLBACK_MODEL,
 }
+# This variable is used as the args for subprocess.run(). We
+# can modify  this variable to alter the args if needed. e.g.
+# when we use par format to pack things together, sys.executable
+# might not be the target we want to run.
+_SUBPROCESS_COMMAND = [
+    sys.executable, "-m", "vllm.model_executor.models.registry"
+]
 @dataclass(frozen=True)
 class _ModelInfo:
@@ -502,10 +510,9 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
        # cannot use `sys.executable __file__` here because the script
        # contains relative imports
-        returned = subprocess.run(
+        returned = subprocess.run(_SUBPROCESS_COMMAND,
-            [sys.executable, "-m", "vllm.model_executor.models.registry"],
+                                  input=input_bytes,
-            input=input_bytes,
+                                  capture_output=True)
-            capture_output=True)
        # check if the subprocess is successful
        try:

--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -45,4 +45,4 @@ __all__ = [
    "SolarConfig",
    "Telechat2Config",
    "UltravoxConfig",
 ]
\ No newline at end of file