export_model.sh

#!/bin/bash

# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. 

NV_VISIBLE_DEVICES=${1:-"0"}
DOCKER_BRIDGE=${2:-"host"}
checkpoint=${3:-"/workspace/bert/checkpoints/bert_qa.pt"}
batch_size=${4:-"8"}
BERT_DIR=${5:-"/workspace/bert"}
EXPORT_FORMAT=${6:-"ts-script"}
precision=${7:-"fp16"}
triton_model_version=${8:-1}
triton_model_name=${9:-"bertQA-ts-script"}
triton_dyn_batching_delay=${10:-0}
triton_engine_count=${11:-1}
triton_model_overwrite=${12:-"False"}

PREDICT_FILE="/workspace/bert/data/squad/v1.1/dev-v1.1.json"

DEPLOYER="deployer.py"

CMD="python triton/${DEPLOYER} \
    --${EXPORT_FORMAT} \
    --save-dir /results/triton_models \
    --triton-model-name ${triton_model_name} \
    --triton-model-version ${triton_model_version} \
    --triton-max-batch-size ${batch_size} \
    --triton-dyn-batching-delay ${triton_dyn_batching_delay} \
    --triton-engine-count ${triton_engine_count} "

CMD+="-- --checkpoint ${checkpoint} \
    --config_file ${BERT_DIR}/bert_config.json \
    --vocab_file /workspace/bert/vocab/vocab \
    --predict_file ${PREDICT_FILE} \
    --do_lower_case \
    --batch_size=${batch_size} "

if [[ $precision == "fp16" ]]; then
    CMD+="--fp16 "
fi

bash scripts/docker/launch.sh "${CMD}" ${NV_VISIBLE_DEVICES} ${DOCKER_BRIDGE}