#!/bin/bash export DISABLE_VERSION_CHECK=1 export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export HSA_FORCE_FINE_GRAIN_PCIE=1 export ALLREDUCE_STREAM_WITH_COMPUTE=1 export LLAMA_NN=0 export HF_ENDPOINT=https://hf-mirror.com export VLLM_MLA_DISABLE=0 export VLLM_USE_FLASH_MLA=1 export NCCL_SOCKET_IFNAME=ibp58s0 export NCCL_DEBUG=INFO export NCCL_ALGO=Ring export NCCL_PROTO=Simple export NCCL_MIN_NCHANNELS=32 export NCCL_MAX_NCHANNELS=32 export NCCL_MIN_P2P_NCHANNELS=32 export NCCL_MAX_P2P_NCHANNELS=32 export NCCL_NCHANNELS_PER_PEER=32 export VLLM_RPC_TIMEOUT=1800000 export NCCL_IB_TIMEOUT=30 # export VLLM_WORKER_MULTIPROC_METHOD="spawn" trl vllm-serve --model llama3/Meta-Llama-3-70B-Instruct --tensor-parallel-size 8 --gpu_memory_utilization 0.8 --port 8001