feat: add RISC-V support for CPU backend (v2) (#36578)

Signed-off-by: typer-J <2236066784@qq.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>

feat: add RISC-V support for CPU backend (v2) (#36578)
Signed-off-by: typer-J <2236066784@qq.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
41846537 · typer-J · GitHub · 4aaaf8c8 · 41846537 · 41846537
Unverified Commit 41846537 authored Mar 11, 2026 by typer-J Committed by GitHub Mar 10, 2026
5 changed files
--- a/cmake/cpu_extension.cmake
+++ b/cmake/cpu_extension.cmake
@@ -79,7 +79,8 @@ else()
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
    find_isa(${CPUINFO} "S390" S390_FOUND)
-    find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support
+    find_isa(${CPUINFO} "zvfhmin" RVV_FP16_FOUND) # Check for RISC-V Vector FP16 support
+    find_isa(${CPUINFO} "zvfbfmin" RVV_BF16_FOUND) # Check for RISC-V Vector BF16 support

    # Support cross-compilation by allowing override via environment variables
    if (ENABLE_ARM_BF16)
@@ -142,11 +143,19 @@ elseif (S390_FOUND)
        "-march=native"
        "-mtune=native")
 elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
-    if(RVV_FOUND)
-	    message(FAIL_ERROR "Can't support rvv now.")
+    message(STATUS "RISC-V detected")
+    if(RVV_BF16_FOUND)
+        message(STATUS "BF16 extension detected")
+        set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
+        add_compile_definitions(RISCV_BF16_SUPPORT)
+    elseif (RVV_FP16_FOUND)
+        message(WARNING "BF16 functionality is not available")
+        set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
    else()
+        message(STATUS "compile riscv with scalar")
        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
    endif()
+    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
 else()
    message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
 endif()

--- a/csrc/cpu/cpu_types.hpp
+++ b/csrc/cpu/cpu_types.hpp
@@ -13,6 +13,9 @@
 #elif defined(__aarch64__)
  // arm implementation
  #include "cpu_types_arm.hpp"
+#elif defined(__riscv_v)
+  // riscv implementation
+  #include "cpu_types_riscv.hpp"
 #else
  #warning "unsupported vLLM cpu implementation, vLLM will compile with scalar"
  #include "cpu_types_scalar.hpp"

--- a/csrc/cpu/cpu_types_riscv.hpp
+++ b/csrc/cpu/cpu_types_riscv.hpp
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -7,13 +7,13 @@ numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative d

 # Dependencies for CPUs
 torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
-torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le"
+torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"

 # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
-torchaudio; platform_machine != "s390x"
+torchaudio; platform_machine != "s390x" and platform_machine != "riscv64"

 # required for the image processor of phi3v, this must be updated alongside torch
-torchvision; platform_machine != "s390x"
+torchvision; platform_machine != "s390x"  and platform_machine != "riscv64"

 # Intel Extension for PyTorch, only for x86_64 CPUs
 intel-openmp==2024.2.1; platform_machine == "x86_64"

--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -93,30 +93,7 @@ class CpuPlatform(Platform):
                return [torch.bfloat16, torch.float16, torch.float32]
            return [torch.float16, torch.float32]
        elif self.get_cpu_architecture() == CpuArchEnum.RISCV:
-            # Workaround for Issue #25655: RISC-V scheduler bug with float16
-            #
-            # Background:
-            # - RISC-V currently uses scalar code path
-            # - There is a latent bug in the vLLM scheduler that provides
-            # invalid
-            #   physical_block_idx values under certain conditions
-            # - This bug causes segmentation faults when using float16
-            # dtype on RISC-V
-            # - Testing shows that forcing float32 successfully bypasses
-            # this issue
-            #
-            # Technical details:
-            # - The bug manifests as out-of-bounds physical_block_idx in
-            # block_tables
-            # - Only occurs on RISC-V hardware
-            # tested on Sophgo SG2044
-            # - Does not reproduce on x86 or other architectures
-            # - Root cause is in Python-level scheduling logic,
-            # not C++ kernels
-            #
-            # This is a temporary workaround until the scheduler bug is fixed.
-            # See: https://github.com/vllm-project/vllm/issues/25655
-            return [torch.float32]
+            return [torch.bfloat16, torch.float16, torch.float32]
        # x86/aarch64 CPU has supported both bf16 and fp16 natively.
        return [torch.bfloat16, torch.float16, torch.float32]