#!/bin/bash # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 wait_for_model() { local model_host=$1 local model_port=$2 local n_prefill=${3:-1} local n_decode=${4:-1} local poll=${5:-1} local timeout=${6:-600} local report_every=${7:-60} local health_addr="http://${model_host}:${model_port}/health" echo "Polling ${health_addr} every ${poll} seconds to check whether ${n_prefill} prefills and ${n_decode} decodes are alive" local start_ts=$(date +%s) local report_ts=$(date +%s) while :; do # Curl timeout - our primary use case here is to launch it at the first node (localhost), so no timeout is needed. curl_result=$(curl ${health_addr} 2>/dev/null) # Python path - Use of `check_server_health.py` is self-constrained outside of any packaging. check_result=$(python3 /scripts/check_server_health.py $n_prefill $n_decode <<< $curl_result) if [[ $check_result == *"Model is ready."* ]]; then echo $check_result return 0 fi time_now=$(date +%s) if [[ $((time_now - start_ts)) -ge $timeout ]]; then echo "Model did not get healthy in ${timeout} seconds" exit 2; fi if [[ $((time_now - report_ts)) -ge $report_every ]]; then echo $check_result report_ts=$time_now fi sleep $poll done } warmup_model() { service_host=$1 service_port=$2 served_model_name=$3 model_path=$4 config=$5 model_name="deepseek-ai/DeepSeek-R1" model_path="deepseek-ai/DeepSeek-R1-0528" head_node="localhost" head_port="8000" chosen_isl=1024 chosen_osl=1024 chosen_req_rate="inf" chosen_concurrencies=(1 2 4 8 16 32 64 128) for concurrency in ${chosen_concurrencies[@]} do num_prompts=$((concurrency * 5)) command=( python3 -m sglang.bench_serving --base-url "http://${head_node}:${head_port}" --model ${model_name} --tokenizer ${model_path} --backend sglang-oai --dataset-name random --random-input ${chosen_isl} --random-output ${chosen_osl} --random-range-ratio 1 --num-prompts ${num_prompts} --request-rate ${chosen_req_rate} --max-concurrency ${concurrency} ) echo "Running with concurrency: ${concurrency}, num_prompts: ${num_prompts}" "${command[@]}" done }