Unverified Commit c3af4472 authored by Kuntai Du's avatar Kuntai Du Committed by GitHub
Browse files

[Doc]Add documentation to benchmarking script when running TGI (#4920)

parent 1937e298
......@@ -17,6 +17,10 @@ On the client side, run:
--dataset-path <path to dataset> \
--request-rate <request_rate> \ # By default <request_rate> is inf
--num-prompts <num_prompts> # By default <num_prompts> is 1000
when using tgi backend, add
--endpoint /generate_stream
to the end of the command above.
"""
import argparse
import asyncio
......
......@@ -4,7 +4,7 @@ PORT=8000
MODEL=$1
TOKENS=$2
docker run --gpus all --shm-size 1g -p $PORT:80 \
docker run -e HF_TOKEN=$HF_TOKEN --gpus all --shm-size 1g -p $PORT:80 \
-v $PWD/data:/data \
ghcr.io/huggingface/text-generation-inference:1.4.0 \
--model-id $MODEL \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment