Commit e3b8cb11 authored by jerrrrry's avatar jerrrrry
Browse files

Upload New File

parent c4a52ad6
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_MAX_NCHANNELS=16
export NCCL_MIN_NCHANNELS=16
export NCCL_P2P_LEVEL=SYS
export NCCL_LAUNCH_MODE=GROUP
export ROCBLAS_COMPUTETYPE_FP16R=0
export LD_LIBRARY_PATH=/usr/local/lib/python3.10/site-packages/torch/lib/:$LD_LIBRARY_PATH
export VLLM_NUMA_BIND=1
export VLLM_RANK0_NUMA=3
export VLLM_RANK1_NUMA=1
export VLLM_RANK2_NUMA=1
export VLLM_RANK3_NUMA=0
export VLLM_RANK4_NUMA=7
export VLLM_RANK5_NUMA=5
export VLLM_RANK6_NUMA=5
export VLLM_RANK7_NUMA=4
vllm serve /workspace/llms/bf16_model/ --trust-remote-code --distributed-executor-backend ray --dtype bfloat16 --max-model-len 24000 --max-seq-len-to-capture 24000 -tp 32 --gpu-memory-utilization 0.9 --max-num-seqs 128 --speculative_config '{"num_speculative_tokens": 1}' --block-size 64 --disable-log-requests
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment