export HIP_VISIBLE_DEVICES=1 tp=1 model_path=/llm-models/qwen1.5/Qwen1.5-0.5B-Chat batch="1 2" prompt_tokens="16 64" completion_tokens="128 256" python benchmark_throughput_0.7.2.py --model ${model_path} --tensor-parallel-size ${tp} --num-prompts ${batch} --input-len ${prompt_tokens} --output-len ${completion_tokens} \ --dtype float16 --trust-remote-code --max-model-len 32768 --output-json ./test_0.5B-0.7.2.txt