Unverified Commit 100c7b65 authored by lyd1992's avatar lyd1992 Committed by GitHub
Browse files

[Platform] Fix RISC-V platform detection (lscpu parsing + non-NUMA meminfo) (#40427)


Signed-off-by: default avatarliuyudong <liuyudong@iscas.ac.cn>
parent 56bdf85e
...@@ -927,7 +927,9 @@ def get_vllm_version() -> str: ...@@ -927,7 +927,9 @@ def get_vllm_version() -> str:
elif _is_tpu(): elif _is_tpu():
version += f"{sep}tpu" version += f"{sep}tpu"
elif _is_cpu(): elif _is_cpu():
if envs.VLLM_TARGET_DEVICE == "cpu": # Check the local VLLM_TARGET_DEVICE (may be set by auto-detect above),
# not envs.VLLM_TARGET_DEVICE, so CPU-only hosts still get `+cpu`.
if VLLM_TARGET_DEVICE == "cpu":
version += f"{sep}cpu" version += f"{sep}cpu"
elif _is_xpu(): elif _is_xpu():
version += f"{sep}xpu" version += f"{sep}xpu"
......
...@@ -177,7 +177,6 @@ def cpu_platform_plugin() -> str | None: ...@@ -177,7 +177,6 @@ def cpu_platform_plugin() -> str | None:
logger.debug( logger.debug(
"Confirmed CPU platform is available because the machine is MacOS." "Confirmed CPU platform is available because the machine is MacOS."
) )
except Exception as e: except Exception as e:
logger.debug("CPU platform is not available because: %s", str(e)) logger.debug("CPU platform is not available because: %s", str(e))
......
...@@ -87,7 +87,13 @@ def get_memory_node_info(node_id: int = 0) -> MemoryNodeInfo: ...@@ -87,7 +87,13 @@ def get_memory_node_info(node_id: int = 0) -> MemoryNodeInfo:
meminfo_path = f"/sys/devices/system/node/node{node_id}/meminfo" meminfo_path = f"/sys/devices/system/node/node{node_id}/meminfo"
if not os.path.exists(meminfo_path): if not os.path.exists(meminfo_path):
raise RuntimeError(f"{meminfo_path} doesn't exit.") # Non-NUMA systems (e.g. many RISC-V boards) don't expose per-node
# meminfo. Fall back to system-wide numbers from psutil.
vm = psutil.virtual_memory()
return MemoryNodeInfo(
total_memory=vm.total,
available_memory=vm.available,
)
meminfo = {} meminfo = {}
with open(meminfo_path) as f: with open(meminfo_path) as f:
...@@ -147,19 +153,36 @@ def get_visible_memory_node() -> list[int]: ...@@ -147,19 +153,36 @@ def get_visible_memory_node() -> list[int]:
@cache @cache
def _synthesize_cpu_list() -> list[LogicalCPUInfo]:
"""Synthesize a flat CPU list: each logical CPU is its own core on
NUMA node 0. Used when lscpu output is unavailable or unparsable
(e.g. macOS, RISC-V)."""
cpu_count = os.cpu_count()
assert cpu_count
return [LogicalCPUInfo(i, i, 0) for i in range(cpu_count)]
def _get_cpu_list() -> list[LogicalCPUInfo]: def _get_cpu_list() -> list[LogicalCPUInfo]:
if platform.system() == "Darwin": if platform.system() == "Darwin":
# For MacOS, no user-level CPU affinity and SMT, return all CPUs # For MacOS, no user-level CPU affinity and SMT, return all CPUs
cpu_count = os.cpu_count() return _synthesize_cpu_list()
assert cpu_count
return [LogicalCPUInfo(i, i, 0) for i in range(cpu_count)]
lscpu_output = subprocess.check_output( lscpu_output = subprocess.check_output(
"lscpu --json --extended=CPU,CORE,NODE --online", shell=True, text=True "lscpu --json --extended=CPU,CORE,NODE --online", shell=True, text=True
) )
# For platform without NUMA, replace '-' to '0' # For platforms without NUMA, map bare `-` node to 0 so non-NUMA
lscpu_output = re.sub(r'"node":\s*-\s*(,|\n)', r'"node": 0\1', lscpu_output) # systems keep the existing behavior from #39781.
lscpu_output = re.sub(r'"node":\s*-\s*(,|\n|\})', r'"node": 0\1', lscpu_output)
# On some architectures (notably RISC-V), lscpu also emits bare `-`
# for cpu/core. Quote them so the JSON parses; they will decode to
# -1 and be filtered out below, triggering the synthesized fallback.
lscpu_output = re.sub(
r'("(?:cpu|core)":\s*)-\s*(,|\n|\})',
r'\1"-"\2',
lscpu_output,
)
logical_cpu_list: list[LogicalCPUInfo] = json.loads( logical_cpu_list: list[LogicalCPUInfo] = json.loads(
lscpu_output, object_hook=LogicalCPUInfo.json_decoder lscpu_output, object_hook=LogicalCPUInfo.json_decoder
...@@ -170,4 +193,9 @@ def _get_cpu_list() -> list[LogicalCPUInfo]: ...@@ -170,4 +193,9 @@ def _get_cpu_list() -> list[LogicalCPUInfo]:
x for x in logical_cpu_list if -1 not in (x.id, x.physical_core, x.numa_node) x for x in logical_cpu_list if -1 not in (x.id, x.physical_core, x.numa_node)
] ]
# If lscpu returned no valid entries (e.g. RISC-V where all fields
# are bare `-`), fall back to synthesized topology.
if not logical_cpu_list:
logical_cpu_list = _synthesize_cpu_list()
return logical_cpu_list return logical_cpu_list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment