Unverified Commit c3af4472 authored by Kuntai Du's avatar Kuntai Du Committed by GitHub
Browse files

[Doc]Add documentation to benchmarking script when running TGI (#4920)

parent 1937e298
...@@ -17,6 +17,10 @@ On the client side, run: ...@@ -17,6 +17,10 @@ On the client side, run:
--dataset-path <path to dataset> \ --dataset-path <path to dataset> \
--request-rate <request_rate> \ # By default <request_rate> is inf --request-rate <request_rate> \ # By default <request_rate> is inf
--num-prompts <num_prompts> # By default <num_prompts> is 1000 --num-prompts <num_prompts> # By default <num_prompts> is 1000
when using tgi backend, add
--endpoint /generate_stream
to the end of the command above.
""" """
import argparse import argparse
import asyncio import asyncio
......
...@@ -4,7 +4,7 @@ PORT=8000 ...@@ -4,7 +4,7 @@ PORT=8000
MODEL=$1 MODEL=$1
TOKENS=$2 TOKENS=$2
docker run --gpus all --shm-size 1g -p $PORT:80 \ docker run -e HF_TOKEN=$HF_TOKEN --gpus all --shm-size 1g -p $PORT:80 \
-v $PWD/data:/data \ -v $PWD/data:/data \
ghcr.io/huggingface/text-generation-inference:1.4.0 \ ghcr.io/huggingface/text-generation-inference:1.4.0 \
--model-id $MODEL \ --model-id $MODEL \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment