benchmark.sh 2.09 KB
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash

# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

MODEL_REPO=${1:-"/repo"}
OUTPUT=${2:-"/logs"}
MODEL_ARCH=${3:-"resnet50"}
MODEL_CHECKPOINT=${4:-"/checkpoint.pth"}

for backend in ts onnx trt; do
    if [[ "$backend" = "ts" ]]; then
        EXPORT_NAME="ts-script"
    else
        EXPORT_NAME="${backend}"
    fi

    for precision in 16 32; do
        if [[ $precision -eq 16 ]]; then
            CUSTOM_FLAGS="--fp16"
            CUSTON_TRTFLAGS="--trt-fp16 --max_workspace_size 2147483648"
        else
            CUSTOM_FLAGS=""
            CUSTON_TRTFLAGS=""
        fi

        echo "Exporting model as ${EXPORT_NAME} with precision ${precision}"

        python -m triton.deployer --${EXPORT_NAME} --triton-model-name model_${backend} --triton-max-batch-size 64 \
            --triton-engine-count 2 --save-dir ${MODEL_REPO} ${CUSTON_TRTFLAGS} -- --config ${MODEL_ARCH} ${CUSTOM_FLAGS}
        sleep 30

        /workspace/bin/perf_client --max-threads 10 -m model_${backend} -x 1 -p 10000 -v -i gRPC -u localhost:8001 -b 1 \
            -l 5000 --concurrency-range 1:2 -f ${OUTPUT}/${backend}_dynamic_${precision}.csv
        for CONCURENCY_LEVEL in 4 8 16 32 64 128 256; do
            /workspace/bin/perf_client --max-threads 10 -m model_${backend} -x 1 -p 10000 -v -i gRPC -u localhost:8001 -b 1 \
                -l 5000 --concurrency-range $CONCURENCY_LEVEL:$CONCURENCY_LEVEL -f >(tail -n +2 >> ${OUTPUT}/${backend}_dynamic_${precision}.csv)
        done
        rm -rf ${MODEL_REPO}/model_${backend}
    done
    cat ${OUTPUT}/*_dynamic_*.csv
done