export VLLM_NUMA_BIND=1 export VLLM_RANK0_NUMA=0 export VLLM_RANK1_NUMA=0 export VLLM_RANK2_NUMA=0 export VLLM_RANK3_NUMA=0 export HSA_FORCE_FINE_GRAIN_PCIE=1 export NCCL_MIN_NCHANNELS=16 export NCCL_MAX_NCHANNELS=16 export NCCL_P2P_LEVEL=SYS export NCCL_LAUNCH_MODE=GROUP export ALLREDUCE_STREAM_WITH_COMPUTE=1 export VLLM_RPC_TIMEOUT=1800000 export VLLM_ZERO_OVERHEAD=1 export VLLM_ZERO_OPT_ZEROS=1 # 测试Qwen3-30B-A3B所需环境变量 export VLLM_USE_FUSED_RMS_ROPE=1 export VLLM_USE_MARLIN_W16A16_MOE=1 # 测试Qwen3-Next需要环境变量 export VLLM_USE_NN=0 export TRITON_MOVE_LOAD_TOFRONT_DOT=0 export HIP_VISIBLE_DEVICES=6,7 vllm serve /data2/models/qwen3-8B \ --served-model-name qwen3-8B \ --host 0.0.0.0 \ --port 8000 \ --trust-remote-code \ --tensor-parallel-size 2