add VLLM_USE_CUDA_GRAPH_SIZES(1) to use 1-24... (not only 1 2 4 8 16)
set VLLM_USE_LIGHTOP_FILL_MOE_ALIGN=1, VLLM_USE_OPT_ZEROS=1 and VLLM_USE_PP_SYNC=1
Showing
Please register or sign in to comment
set VLLM_USE_LIGHTOP_FILL_MOE_ALIGN=1, VLLM_USE_OPT_ZEROS=1 and VLLM_USE_PP_SYNC=1