"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "8f4b313c3790844d2d6ec9aeaa6dd0825c94752e"
Unverified Commit ec7aafc0 authored by velonica0's avatar velonica0 Committed by GitHub
Browse files

[CPU][RISC-V] Support multiple RVV VLEN targets via compile-time dispatch (#39478)


Signed-off-by: default avatarvelonica0 <like@mail.nankai.edu.cn>
parent 6097afb9
...@@ -161,16 +161,49 @@ elseif (S390_FOUND) ...@@ -161,16 +161,49 @@ elseif (S390_FOUND)
"-mtune=native") "-mtune=native")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
message(STATUS "RISC-V detected") message(STATUS "RISC-V detected")
if(RVV_BF16_FOUND) # VLLM_RVV_VLEN selects the target VLEN. Auto-detected from /proc/cpuinfo
message(STATUS "BF16 extension detected") # by default; override with -DVLLM_RVV_VLEN=128 or -DVLLM_RVV_VLEN=256.
set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d) if(NOT DEFINED VLLM_RVV_VLEN)
add_compile_definitions(RISCV_BF16_SUPPORT) # Auto-detect: find the largest zvl<N>b in /proc/cpuinfo isa line.
elseif (RVV_FP16_FOUND) if(EXISTS /proc/cpuinfo)
message(WARNING "BF16 functionality is not available") file(READ /proc/cpuinfo _cpuinfo)
set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d) set(_best 0)
foreach(_n IN ITEMS 128 256 512 1024)
if(_cpuinfo MATCHES "zvl${_n}b")
set(_best ${_n})
endif()
endforeach()
if(_best GREATER 0)
set(VLLM_RVV_VLEN ${_best})
endif()
endif()
# If auto-detect failed (no /proc/cpuinfo or no zvl<N>b reported)
# but the compiler supports RVV, require explicit specification.
if(NOT DEFINED VLLM_RVV_VLEN AND (RVV_FP16_FOUND OR RVV_BF16_FOUND))
message(FATAL_ERROR
"RISC-V RVV is available but VLEN could not be auto-detected. "
"Please specify VLEN explicitly:\n"
" -DVLLM_RVV_VLEN=128 (for VLEN=128 hardware)\n"
" -DVLLM_RVV_VLEN=256 (for VLEN=256 hardware, e.g. Spacemit X100)\n"
" -DVLLM_RVV_VLEN=0 (force scalar, no RVV)")
endif()
endif()
if(VLLM_RVV_VLEN AND VLLM_RVV_VLEN GREATER 0)
message(STATUS "RISC-V RVV VLEN=${VLLM_RVV_VLEN}")
if(RVV_BF16_FOUND)
message(STATUS "BF16 extension detected")
set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl${VLLM_RVV_VLEN}b -mrvv-vector-bits=zvl -mabi=lp64d)
add_compile_definitions(RISCV_BF16_SUPPORT)
elseif(RVV_FP16_FOUND)
message(WARNING "BF16 functionality is not available")
set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl${VLLM_RVV_VLEN}b -mrvv-vector-bits=zvl -mabi=lp64d)
else()
message(STATUS "compile riscv with scalar (no FP16/BF16)")
set(MARCH_FLAGS -march=rv64gc)
endif()
else() else()
message(STATUS "compile riscv with scalar") message(STATUS "compile riscv with scalar")
list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc") set(MARCH_FLAGS -march=rv64gc)
endif() endif()
list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS}) list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
else() else()
......
This diff is collapsed.
#ifndef CPU_TYPES_RISCV_DEFS_HPP
#define CPU_TYPES_RISCV_DEFS_HPP
// VLEN-to-LMUL mapping for RISC-V Vector extension.
//
// LMUL_<N> expands to the LMUL suffix giving N total bits of vector data:
// VLEN=128: LMUL_128=m1, LMUL_256=m2, LMUL_512=m4, LMUL_1024=m8
// VLEN=256: LMUL_128=mf2, LMUL_256=m1, LMUL_512=m2, LMUL_1024=m4
#include <riscv_vector.h>
#if __riscv_v_min_vlen == 128
#define LMUL_128 m1
#define LMUL_256 m2
#define LMUL_512 m4
#define LMUL_1024 m8
#define BOOL_256 b16
#define BOOL_512 b8
#elif __riscv_v_min_vlen == 256
#define LMUL_128 mf2
#define LMUL_256 m1
#define LMUL_512 m2
#define LMUL_1024 m4
#define BOOL_256 b32
#define BOOL_512 b16
#else
#error "cpu_types_riscv_defs.hpp: unsupported __riscv_v_min_vlen"
#endif
// Token-paste helpers.
#define _RVV_P2(a, b) a##b
#define _RVV_P3(a, b, c) a##b##c
#define _RVV_P4(a, b, c, d) a##b##c##d
#define RVVTYPE(base, lmul, suffix) _RVV_P3(base, lmul, suffix)
#define RVVI(base, lmul) _RVV_P2(base, lmul)
#define RVVI3(base, lmul, suffix) _RVV_P3(base, lmul, suffix)
#define RVVI4(a, b, c, d) _RVV_P4(a, b, c, d)
// For mask intrinsics: RVVIB(base, LMUL_256, BOOL_256) → base##m2##_##b16
#define _RVV_PB(base, lmul, btype) base##lmul##_##btype
#define RVVIB(base, lmul, btype) _RVV_PB(base, lmul, btype)
// ---- Semantic fixed-vector typedefs (named by element count) ----
// float16
typedef RVVTYPE(vfloat16, LMUL_128, _t) fixed_fp16x8_t
__attribute__((riscv_rvv_vector_bits(128)));
typedef RVVTYPE(vfloat16, LMUL_256, _t) fixed_fp16x16_t
__attribute__((riscv_rvv_vector_bits(256)));
// float32
typedef RVVTYPE(vfloat32, LMUL_128, _t) fixed_fp32x4_t
__attribute__((riscv_rvv_vector_bits(128)));
typedef RVVTYPE(vfloat32, LMUL_256, _t) fixed_fp32x8_t
__attribute__((riscv_rvv_vector_bits(256)));
typedef RVVTYPE(vfloat32, LMUL_512, _t) fixed_fp32x16_t
__attribute__((riscv_rvv_vector_bits(512)));
typedef RVVTYPE(vfloat32, LMUL_1024, _t) fixed_fp32x32_t
__attribute__((riscv_rvv_vector_bits(1024)));
// int32
typedef RVVTYPE(vint32, LMUL_256, _t) fixed_i32x8_t
__attribute__((riscv_rvv_vector_bits(256)));
typedef RVVTYPE(vint32, LMUL_512, _t) fixed_i32x16_t
__attribute__((riscv_rvv_vector_bits(512)));
// uint16
typedef RVVTYPE(vuint16, LMUL_128, _t) fixed_u16x8_t
__attribute__((riscv_rvv_vector_bits(128)));
typedef RVVTYPE(vuint16, LMUL_256, _t) fixed_u16x16_t
__attribute__((riscv_rvv_vector_bits(256)));
typedef RVVTYPE(vuint16, LMUL_512, _t) fixed_u16x32_t
__attribute__((riscv_rvv_vector_bits(512)));
// bfloat16
#ifdef RISCV_BF16_SUPPORT
typedef RVVTYPE(vbfloat16, LMUL_128, _t) fixed_bf16x8_t
__attribute__((riscv_rvv_vector_bits(128)));
typedef RVVTYPE(vbfloat16, LMUL_256, _t) fixed_bf16x16_t
__attribute__((riscv_rvv_vector_bits(256)));
typedef RVVTYPE(vbfloat16, LMUL_512, _t) fixed_bf16x32_t
__attribute__((riscv_rvv_vector_bits(512)));
#endif
// ---- Reduction accumulator type (always m1 = one register of f32) ----
// Used for scalar reductions; only element [0] is meaningful.
typedef vfloat32m1_t rvv_f32_accum_t
__attribute__((riscv_rvv_vector_bits(__riscv_v_min_vlen)));
// ---- Mask types for f32 elements ----
#if __riscv_v_min_vlen == 128
typedef vbool16_t rvv_mask_f32x8_t;
typedef vbool8_t rvv_mask_f32x16_t;
#elif __riscv_v_min_vlen == 256
typedef vbool32_t rvv_mask_f32x8_t;
typedef vbool16_t rvv_mask_f32x16_t;
#endif
#endif // CPU_TYPES_RISCV_DEFS_HPP
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment