Unverified Commit 41846537 authored by typer-J's avatar typer-J Committed by GitHub
Browse files

feat: add RISC-V support for CPU backend (v2) (#36578)


Signed-off-by: default avatartyper-J <2236066784@qq.com>
Co-authored-by: default avatarLi, Jiang <jiang1.li@intel.com>
parent 4aaaf8c8
......@@ -79,7 +79,8 @@ else()
find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
find_isa(${CPUINFO} "S390" S390_FOUND)
find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support
find_isa(${CPUINFO} "zvfhmin" RVV_FP16_FOUND) # Check for RISC-V Vector FP16 support
find_isa(${CPUINFO} "zvfbfmin" RVV_BF16_FOUND) # Check for RISC-V Vector BF16 support
# Support cross-compilation by allowing override via environment variables
if (ENABLE_ARM_BF16)
......@@ -142,11 +143,19 @@ elseif (S390_FOUND)
"-march=native"
"-mtune=native")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
if(RVV_FOUND)
message(FAIL_ERROR "Can't support rvv now.")
message(STATUS "RISC-V detected")
if(RVV_BF16_FOUND)
message(STATUS "BF16 extension detected")
set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
add_compile_definitions(RISCV_BF16_SUPPORT)
elseif (RVV_FP16_FOUND)
message(WARNING "BF16 functionality is not available")
set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
else()
message(STATUS "compile riscv with scalar")
list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
endif()
list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
else()
message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
endif()
......
......@@ -13,6 +13,9 @@
#elif defined(__aarch64__)
// arm implementation
#include "cpu_types_arm.hpp"
#elif defined(__riscv_v)
// riscv implementation
#include "cpu_types_riscv.hpp"
#else
#warning "unsupported vLLM cpu implementation, vLLM will compile with scalar"
#include "cpu_types_scalar.hpp"
......
This diff is collapsed.
......@@ -7,13 +7,13 @@ numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative d
# Dependencies for CPUs
torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le"
torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "s390x"
torchaudio; platform_machine != "s390x" and platform_machine != "riscv64"
# required for the image processor of phi3v, this must be updated alongside torch
torchvision; platform_machine != "s390x"
torchvision; platform_machine != "s390x" and platform_machine != "riscv64"
# Intel Extension for PyTorch, only for x86_64 CPUs
intel-openmp==2024.2.1; platform_machine == "x86_64"
......
......@@ -93,30 +93,7 @@ class CpuPlatform(Platform):
return [torch.bfloat16, torch.float16, torch.float32]
return [torch.float16, torch.float32]
elif self.get_cpu_architecture() == CpuArchEnum.RISCV:
# Workaround for Issue #25655: RISC-V scheduler bug with float16
#
# Background:
# - RISC-V currently uses scalar code path
# - There is a latent bug in the vLLM scheduler that provides
# invalid
# physical_block_idx values under certain conditions
# - This bug causes segmentation faults when using float16
# dtype on RISC-V
# - Testing shows that forcing float32 successfully bypasses
# this issue
#
# Technical details:
# - The bug manifests as out-of-bounds physical_block_idx in
# block_tables
# - Only occurs on RISC-V hardware
# tested on Sophgo SG2044
# - Does not reproduce on x86 or other architectures
# - Root cause is in Python-level scheduling logic,
# not C++ kernels
#
# This is a temporary workaround until the scheduler bug is fixed.
# See: https://github.com/vllm-project/vllm/issues/25655
return [torch.float32]
return [torch.bfloat16, torch.float16, torch.float32]
# x86/aarch64 CPU has supported both bf16 and fp16 natively.
return [torch.bfloat16, torch.float16, torch.float32]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment