#!/bin/bash # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 wait_for_model() { local model_host=$1 local model_port=$2 local poll=${3:-1} local timeout=${4:-600} local report_every=${5:-60} local health_addr="http://${model_host}:${model_port}/health" echo "Polling ${health_addr} every ${poll} seconds" local start_ts=$(date +%s) local report_ts=$(date +%s) while :; do curl_result=$(curl ${health_addr} 2>/dev/null) health=$(grep '"status":"healthy"' <<< $curl_result) if [[ -n $health ]]; then echo "Model is alive. Health response: ${curl_result}; " return 0; fi time_now=$(date +%s) if [[ $((time_now - start_ts)) -ge $timeout ]]; then echo "Model did not get healthy in ${timeout} seconds" exit 2; fi if [[ $((time_now - report_ts)) -ge $report_every ]]; then echo "Waiting for model to come alive. Current result: ${curl_result}" report_ts=$time_now fi sleep $poll done } warmup_model() { service_host=$1 service_port=$2 served_model_name=$3 model_path=$4 config=$5 IFS='x' read -r -a config_list <<< "$config" isl=${config_list[0]} osl=${config_list[1]} num_prompts=${config_list[2]} concurrency=${config_list[3]} request_rate=${config_list[4]} command=( python3 -m sglang.bench_serving --base-url "http://${service_host}:${service_port}" --model ${served_model_name} --tokenizer ${model_path} --backend sglang-oai --dataset-name random --random-input ${isl} --random-output ${osl} --random-range-ratio 1 --num-prompts ${num_prompts} --request-rate ${request_rate} --max-concurrency ${concurrency} ) echo "Config ${config}. Running command ${command[@]}" ${command[@]} }