run_perf_client.sh 2.58 KB
Newer Older
hepj987's avatar
hepj987 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash

# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. 

MODEL_NAME=${1:-"bert"}
MODEL_VERSION=${2:-1}
precision=${3:-"fp32"}
BATCH_SIZE=${4:-1}
MAX_LATENCY=${5:-500}
MAX_CLIENT_THREADS=${6:-10}
MAX_CONCURRENCY=${7:-50}
SERVER_HOSTNAME=${8:-"localhost"}
DOCKER_BRIDGE=${9:-"host"}
RESULTS_ID=${10:-""}
PROFILING_DATA=${11:-"triton/profiling_data_int64"}
NV_VISIBLE_DEVICES=${12:-"0"}

if [[ $SERVER_HOSTNAME == *":"* ]]; then
  echo "ERROR! Do not include the port when passing the Server Hostname. These scripts require that the TRITON HTTP endpoint is on Port 8000 and the gRPC endpoint is on Port 8001. Exiting..."
  exit 1
fi

if [ "$SERVER_HOSTNAME" = "localhost" ]
then
    if [ ! "$(docker inspect -f "{{.State.Running}}" trt_server_cont)" = "true" ] ; then

        echo "Launching TRITON server"
        bash triton/launch_triton_server.sh ${DOCKER_BRIDGE} --NV_VISIBLE_DEVICES=$NV_VISIBLE_DEVICES
        SERVER_LAUNCHED=true

        function cleanup_server {
            docker kill trt_server_cont
        }

        # Ensure we cleanup the server on exit
        # trap "exit" INT TERM
        trap cleanup_server EXIT
    fi
fi

# Wait until server is up. curl on the health of the server and sleep until its ready
bash triton/wait_for_triton_server.sh $SERVER_HOSTNAME

TIMESTAMP=$(date "+%y%m%d_%H%M")

# Create model directory on host (directory /results is mounted)
bash scripts/docker/launch.sh "mkdir -p /results/perf_client/${MODEL_NAME}"
if [ ! -z "${RESULTS_ID}" ];
then
    RESULTS_ID="_${RESULTS_ID}"
fi

OUTPUT_FILE_CSV="/results/perf_client/${MODEL_NAME}/results${RESULTS_ID}_${TIMESTAMP}.csv"

ARGS="\
   --max-threads ${MAX_CLIENT_THREADS} \
   -m ${MODEL_NAME} \
   -x ${MODEL_VERSION} \
   -p 3000 \
   -d \
   -v \
   -i gRPC \
   -u ${SERVER_HOSTNAME}:8001 \
   -b ${BATCH_SIZE} \
   -l ${MAX_LATENCY} \
   -c ${MAX_CONCURRENCY} \
   -f ${OUTPUT_FILE_CSV} \
   --input-data ${PROFILING_DATA}"

echo "Using args:  $(echo "$ARGS" | sed -e 's/   -/\n-/g')"
bash scripts/docker/launch.sh "/workspace/install/bin/perf_client $ARGS" all $DOCKER_BRIDGE