launch_tgi_server.sh 464 Bytes
Newer Older
laibao's avatar
laibao committed
1
2
3
4
5
6
7
8
#!/bin/bash

PORT=8000
MODEL=$1
TOKENS=$2

docker run -e HF_TOKEN=$HF_TOKEN --gpus all --shm-size 1g -p $PORT:80 \
           -v $PWD/data:/data \
laibao's avatar
laibao committed
9
           ghcr.io/huggingface/text-generation-inference:2.2.0 \
laibao's avatar
laibao committed
10
11
12
13
14
15
16
           --model-id $MODEL \
           --sharded false  \
           --max-input-length 1024 \
           --max-total-tokens 2048 \
           --max-best-of 5 \
           --max-concurrent-requests 5000 \
           --max-batch-total-tokens $TOKENS