[CI][BugFix] ShellCheck cleanup to remove baseline and preserve runtime behavior (#34514)

Signed-off-by: junuxyz <216036880+junuxyz@users.noreply.github.com>

[CI][BugFix] ShellCheck cleanup to remove baseline and preserve runtime behavior (#34514)
Signed-off-by: junuxyz <216036880+junuxyz@users.noreply.github.com>
c61a98f5 · junuxyz · GitHub · 28bffe94 · c61a98f5 · c61a98f5
Unverified Commit c61a98f5 authored Feb 17, 2026 by junuxyz Committed by GitHub Feb 17, 2026
20 changed files
--- a/.buildkite/scripts/tpu/docker_run_bm.sh
+++ b/.buildkite/scripts/tpu/docker_run_bm.sh
@@ -9,10 +9,11 @@ ENV_FILE=$1

 # For testing on local vm, use `set -a` to export all variables
 source /etc/environment
-source $ENV_FILE
+# shellcheck source=/dev/null
+source "$ENV_FILE"

 remove_docker_container() { 
-    docker rm -f $CONTAINER_NAME || true;
+    docker rm -f "$CONTAINER_NAME" || true;
 }

 trap remove_docker_container EXIT
@@ -41,13 +42,13 @@ echo
 echo "starting docker...$CONTAINER_NAME"
 echo    
 docker run \
- -v $DOWNLOAD_DIR:$DOWNLOAD_DIR \
- --env-file $ENV_FILE \
+ -v "$DOWNLOAD_DIR":"$DOWNLOAD_DIR" \
+ --env-file "$ENV_FILE" \
 -e HF_TOKEN="$HF_TOKEN" \
- -e TARGET_COMMIT=$BUILDKITE_COMMIT \
- -e MODEL=$MODEL \
+ -e TARGET_COMMIT="$BUILDKITE_COMMIT" \
+ -e MODEL="$MODEL" \
 -e WORKSPACE=/workspace \
- --name $CONTAINER_NAME \
+ --name "$CONTAINER_NAME" \
 -d \
 --privileged \
 --network host \

--- a/.buildkite/scripts/tpu/run_bm.sh
+++ b/.buildkite/scripts/tpu/run_bm.sh
@@ -42,21 +42,21 @@ echo "lanching vllm..."
 echo "logging to $VLLM_LOG"
 echo

-vllm serve $MODEL \
+vllm serve "$MODEL" \
 --seed 42 \
- --max-num-seqs $MAX_NUM_SEQS \
- --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
- --tensor-parallel-size $TENSOR_PARALLEL_SIZE \
+ --max-num-seqs "$MAX_NUM_SEQS" \
+ --max-num-batched-tokens "$MAX_NUM_BATCHED_TOKENS" \
+ --tensor-parallel-size "$TENSOR_PARALLEL_SIZE" \
 --no-enable-prefix-caching \
- --download_dir $DOWNLOAD_DIR \
- --max-model-len $MAX_MODEL_LEN > "$VLLM_LOG" 2>&1 &
+ --download_dir "$DOWNLOAD_DIR" \
+ --max-model-len "$MAX_MODEL_LEN" > "$VLLM_LOG" 2>&1 &


 echo "wait for 20 minutes.."
 echo
 # sleep 1200
 # wait for 10 minutes...
-for i in {1..120}; do
+for _ in {1..120}; do
    # TODO: detect other type of errors.
    if grep -Fq "raise RuntimeError" "$VLLM_LOG"; then
        echo "Detected RuntimeError, exiting."
@@ -78,11 +78,11 @@ echo "logging to $BM_LOG"
 echo
 vllm bench serve \
    --backend vllm \
-    --model $MODEL  \
+    --model "$MODEL"  \
    --dataset-name sonnet \
    --dataset-path benchmarks/sonnet_4x.txt \
-    --sonnet-input-len $INPUT_LEN \
-    --sonnet-output-len $OUTPUT_LEN \
+    --sonnet-input-len "$INPUT_LEN" \
+    --sonnet-output-len "$OUTPUT_LEN" \
    --ignore-eos > "$BM_LOG"

 echo "completed..."

--- a/.buildkite/scripts/upload-nightly-wheels.sh
+++ b/.buildkite/scripts/upload-nightly-wheels.sh
@@ -76,16 +76,15 @@ mkdir -p "$INDICES_OUTPUT_DIR"
 # this indices have relative paths that could work as long as it is next to the wheel directory in s3
 # i.e., the wheels are always in s3://vllm-wheels/<commit>/
 # and indices can be placed in /<commit>/, or /nightly/, or /<version>/
-if [[ ! -z "$DEFAULT_VARIANT_ALIAS" ]]; then
-    alias_arg="--alias-to-default $DEFAULT_VARIANT_ALIAS"
-else
-    alias_arg=""
+alias_args=()
+if [[ -n "$DEFAULT_VARIANT_ALIAS" ]]; then
+    alias_args=(--alias-to-default "$DEFAULT_VARIANT_ALIAS")
 fi

 # HACK: we do not need regex module here, but it is required by pre-commit hook
 # To avoid any external dependency, we simply replace it back to the stdlib re module
 sed -i 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py
-$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "commit $BUILDKITE_COMMIT" $alias_arg
+$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "commit $BUILDKITE_COMMIT" "${alias_args[@]}"

 # copy indices to /<commit>/ unconditionally
 echo "Uploading indices to $S3_COMMIT_PREFIX"
@@ -100,9 +99,9 @@ fi
 # re-generate and copy to /<pure_version>/ only if it does not have "dev" in the version
 if [[ "$version" != *"dev"* ]]; then
    echo "Re-generating indices for /$pure_version/"
-    rm -rf "$INDICES_OUTPUT_DIR/*"
+    rm -rf "${INDICES_OUTPUT_DIR:?}/*"
    mkdir -p "$INDICES_OUTPUT_DIR"
    # wheel-dir is overridden to be the commit directory, so that the indices point to the correct wheel path
-    $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --wheel-dir "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" $alias_arg
+    $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --wheel-dir "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" "${alias_args[@]}"
    aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/$pure_version/"
 fi
--- a/.buildkite/scripts/upload-release-wheels-pypi.sh
+++ b/.buildkite/scripts/upload-release-wheels-pypi.sh
@@ -7,7 +7,7 @@ SUBPATH=$BUILDKITE_COMMIT
 S3_COMMIT_PREFIX="s3://$BUCKET/$SUBPATH/"

 RELEASE_VERSION=$(buildkite-agent meta-data get release-version)
-GIT_VERSION=$(git describe --exact-match --tags $BUILDKITE_COMMIT 2>/dev/null)
+GIT_VERSION=$(git describe --exact-match --tags "$BUILDKITE_COMMIT" 2>/dev/null)

 echo "Release version from Buildkite: $RELEASE_VERSION"

@@ -55,7 +55,7 @@ mkdir -p $DIST_DIR
 aws s3 cp --recursive --exclude "*" --include "vllm-${PURE_VERSION}*.whl" --exclude "*dev*" --exclude "*rc[0-9]*" "$S3_COMMIT_PREFIX" $DIST_DIR
 echo "Wheels copied to local directory"
 # generate source tarball
-git archive --format=tar.gz --output="$DIST_DIR/vllm-${PURE_VERSION}.tar.gz" $BUILDKITE_COMMIT
+git archive --format=tar.gz --output="$DIST_DIR/vllm-${PURE_VERSION}.tar.gz" "$BUILDKITE_COMMIT"
 ls -la $DIST_DIR

 # upload wheels to PyPI (only default variant, i.e. files without '+' in the name)
@@ -65,6 +65,6 @@ if [[ -z "$PYPI_WHEEL_FILES" ]]; then
  exit 1
 fi

-python3 -m twine check $PYPI_WHEEL_FILES
-python3 -m twine upload --non-interactive --verbose $PYPI_WHEEL_FILES
+python3 -m twine check "$PYPI_WHEEL_FILES"
+python3 -m twine upload --non-interactive --verbose "$PYPI_WHEEL_FILES"
 echo "Wheels uploaded to PyPI"
--- a/.buildkite/scripts/upload-rocm-wheels.sh
+++ b/.buildkite/scripts/upload-rocm-wheels.sh
@@ -55,7 +55,7 @@ mkdir -p all-rocm-wheels
 cp artifacts/rocm-base-wheels/*.whl all-rocm-wheels/ 2>/dev/null || true
 cp artifacts/rocm-vllm-wheel/*.whl all-rocm-wheels/ 2>/dev/null || true

-WHEEL_COUNT=$(ls all-rocm-wheels/*.whl 2>/dev/null | wc -l)
+WHEEL_COUNT=$(find all-rocm-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l)
 echo "Total wheels to upload: $WHEEL_COUNT"

 if [ "$WHEEL_COUNT" -eq 0 ]; then
@@ -115,7 +115,7 @@ if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]] |
 fi

 # Extract version from vLLM wheel and update version-specific index
-VLLM_WHEEL=$(ls all-rocm-wheels/vllm*.whl 2>/dev/null | head -1)
+VLLM_WHEEL=$(find all-rocm-wheels -maxdepth 1 -name 'vllm*.whl' 2>/dev/null | head -1)
 if [ -n "$VLLM_WHEEL" ]; then
    VERSION=$(unzip -p "$VLLM_WHEEL" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
    echo "Version in wheel: $VERSION"

--- a/benchmarks/auto_tune/auto_tune.sh
+++ b/benchmarks/auto_tune/auto_tune.sh
@@ -46,10 +46,10 @@ echo "VLLM_LOGGING_LEVEL=$VLLM_LOGGING_LEVEL"
 echo "RESULT_FILE=$RESULT"
 echo "====================== AUTO TUNEPARAMETERS ===================="

-rm -rf $LOG_FOLDER
-rm -rf $PROFILE_PATH
-mkdir -p $LOG_FOLDER
-mkdir -p $PROFILE_PATH
+rm -rf "$LOG_FOLDER"
+rm -rf "$PROFILE_PATH"
+mkdir -p "$LOG_FOLDER"
+mkdir -p "$PROFILE_PATH"

 cd "$BASE/vllm"

@@ -114,7 +114,7 @@ start_server() {

    # wait for 10 minutes...
    server_started=0
-    for i in {1..60}; do
+    for _ in {1..60}; do
        # This line checks whether the server is still alive or not,
        # since that we should always have permission to send signal to the server process.
        kill -0 $server_pid 2> /dev/null || break
@@ -145,12 +145,12 @@ run_benchmark() {
    local vllm_log="$LOG_FOLDER/vllm_log_${max_num_seqs}_${max_num_batched_tokens}.txt"
    echo "vllm_log: $vllm_log"
    echo
-    rm -f $vllm_log
+    rm -f "$vllm_log"
    pkill -if "vllm serve" || true

    echo "starting server..."
    # Call start_server without a profile_dir to avoid profiling overhead
-    start_server $gpu_memory_utilization $max_num_seqs $max_num_batched_tokens $vllm_log ""
+    start_server "$gpu_memory_utilization" "$max_num_seqs" "$max_num_batched_tokens" "$vllm_log" ""
    result=$?
    if [[ "$result" -eq 1 ]]; then
        echo "server failed to start. gpu_memory_utilization:$gpu_memory_utilization, max_num_seqs:$max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens"
@@ -168,15 +168,15 @@ run_benchmark() {
    # --profile flag is removed from this call
    vllm bench serve \
        --backend vllm \
-        --model $MODEL  \
+        --model "$MODEL"  \
        --dataset-name random \
        --random-input-len $adjusted_input_len \
-        --random-output-len $OUTPUT_LEN \
+        --random-output-len "$OUTPUT_LEN" \
        --ignore-eos \
        --disable-tqdm \
        --request-rate inf \
        --percentile-metrics ttft,tpot,itl,e2el \
-        --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+        --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \
        --num-prompts 1000 \
        --random-prefix-len $prefix_len \
        --host "$HOSTNAME" \
@@ -195,20 +195,20 @@ run_benchmark() {
        request_rate=$((${throughput%.*} + 1))
        while ((request_rate > 0)); do
            # clear prefix cache
-            curl -X POST http://${HOSTNAME}:8004/reset_prefix_cache
+            curl -X POST http://"${HOSTNAME}":8004/reset_prefix_cache
            sleep 5
            bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt"
            vllm bench serve \
                --backend vllm \
-                --model $MODEL  \
+                --model "$MODEL"  \
                --dataset-name random \
                --random-input-len $adjusted_input_len \
-                --random-output-len $OUTPUT_LEN \
+                --random-output-len "$OUTPUT_LEN" \
                --ignore-eos \
                --disable-tqdm \
                --request-rate $request_rate \
                --percentile-metrics ttft,tpot,itl,e2el \
-                --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+                --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \
                --num-prompts 100 \
                --random-prefix-len $prefix_len \
                --host "$HOSTNAME" \
@@ -255,7 +255,7 @@ gpu_memory_utilization=0.98
 find_gpu_memory_utilization=0
 while (( $(echo "$gpu_memory_utilization >= 0.9" | bc -l) )); do
    # Pass empty string for profile_dir argument
-    start_server $gpu_memory_utilization "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" ""
+    start_server "$gpu_memory_utilization" "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" ""
    result=$?
    if [[ "$result" -eq 0 ]]; then
        find_gpu_memory_utilization=1
@@ -274,7 +274,7 @@ fi

 for num_seqs in "${num_seqs_list[@]}"; do
    for num_batched_tokens in "${num_batched_tokens_list[@]}"; do
-        run_benchmark $num_seqs $num_batched_tokens $gpu_memory_utilization
+        run_benchmark "$num_seqs" "$num_batched_tokens" "$gpu_memory_utilization"
    done
 done
 echo "finish permutations"
@@ -285,7 +285,7 @@ echo "finish permutations"
 if (( $(echo "$best_throughput > 0" | bc -l) )); then
    echo
    echo "Benchmark tuning finished. Now running profiling on the best configuration found..."
-    echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput"
+    echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput, goodput: $best_goodput"
    echo

    vllm_log="$LOG_FOLDER/vllm_log_BEST_PROFILE.txt"
@@ -293,7 +293,7 @@ if (( $(echo "$best_throughput > 0" | bc -l) )); then

    # Start server with the best params and profiling ENABLED
    echo "Starting server for profiling..."
-    start_server $gpu_memory_utilization $best_max_num_seqs $best_num_batched_tokens "$vllm_log" "$PROFILE_PATH"
+    start_server "$gpu_memory_utilization" "$best_max_num_seqs" "$best_num_batched_tokens" "$vllm_log" "$PROFILE_PATH"

    # Run benchmark with the best params and the --profile flag
    echo "Running benchmark with profiling..."
@@ -301,15 +301,15 @@ if (( $(echo "$best_throughput > 0" | bc -l) )); then
    adjusted_input_len=$(( INPUT_LEN - prefix_len ))
    vllm bench serve \
        --backend vllm \
-        --model $MODEL \
+        --model "$MODEL" \
        --dataset-name random \
        --random-input-len $adjusted_input_len \
-        --random-output-len $OUTPUT_LEN \
+        --random-output-len "$OUTPUT_LEN" \
        --ignore-eos \
        --disable-tqdm \
-        --request-rate $best_request_rate \
+        --request-rate "$best_request_rate" \
        --percentile-metrics ttft,tpot,itl,e2el \
-        --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+        --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \
        --num-prompts 100 \
        --random-prefix-len $prefix_len \
        --host "$HOSTNAME" \

--- a/benchmarks/auto_tune/batch_auto_tune.sh
+++ b/benchmarks/auto_tune/batch_auto_tune.sh
@@ -64,7 +64,7 @@ for i in $(seq 0 $(($num_runs - 1))); do
  else
    STATUS="FAILURE"
    ((FAILURE_COUNT++))
-    FAILED_RUNS+=("Run #$((i+1)): $(echo $run_object | jq -c .)")
+    FAILED_RUNS+=("Run #$((i+1)): $(echo "$run_object" | jq -c .)")
  fi

  RUN_OUTPUT=$(<"$RUN_OUTPUT_FILE")

--- a/benchmarks/run_structured_output_benchmark.sh
+++ b/benchmarks/run_structured_output_benchmark.sh
@@ -71,7 +71,7 @@ while [[ $# -gt 0 ]]; do
      usage
      ;;
    *)
-      echo "Unknown argument: $1\n"
+      printf "Unknown argument: %s\n" "$1"
      usage
      ;;
  esac
@@ -84,15 +84,17 @@ mkdir -p "$OUTPUT_DIR"
 QPS_VALUES=(25 20 15 10 5 1)

 # Common parameters
-COMMON_PARAMS="--backend $BACKEND \
-               --model $MODEL \
-               --dataset $DATASET \
-               --structured-output-ratio $STRUCTURED_OUTPUT_RATIO \
-               --save-results \
-               --result-dir $OUTPUT_DIR \
-               --output-len $MAX_NEW_TOKENS \
-               --port $PORT \
-               --tokenizer-mode $TOKENIZER_MODE"
+COMMON_PARAMS=(
+  --backend "$BACKEND"
+  --model "$MODEL"
+  --dataset "$DATASET"
+  --structured-output-ratio "$STRUCTURED_OUTPUT_RATIO"
+  --save-results
+  --result-dir "$OUTPUT_DIR"
+  --output-len "$MAX_NEW_TOKENS"
+  --port "$PORT"
+  --tokenizer-mode "$TOKENIZER_MODE"
+)

 echo "Starting structured output benchmark with model: $MODEL"
 echo "Backend: $BACKEND"
@@ -109,17 +111,17 @@ for qps in "${QPS_VALUES[@]}"; do
  GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown")

  # Construct filename for this run
-  FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json"
+  FILENAME="${BACKEND}_${qps}qps_$(basename "$MODEL")_${DATASET}_${GIT_HASH}_${GIT_BRANCH}.json"

  NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc)
  NUM_PROMPTS=${NUM_PROMPTS%.*}  # Remove fractional part
  echo "Running benchmark with $NUM_PROMPTS prompts"

  # Run the benchmark
-  python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
-    --request-rate $qps \
+  python "$SCRIPT_DIR/benchmark_serving_structured_output.py" "${COMMON_PARAMS[@]}" \
+    --request-rate "$qps" \
    --result-filename "$FILENAME" \
-    --num-prompts $NUM_PROMPTS
+    --num-prompts "$NUM_PROMPTS"

  echo "Completed benchmark with QPS: $qps"
  echo "----------------------------------------"

--- a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
+++ b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
@@ -8,7 +8,7 @@ declare -a PIDS=()
 ###############################################################################
 MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
 LOG_PATH="${LOG_PATH:-./logs}"
-mkdir -p $LOG_PATH
+mkdir -p "$LOG_PATH"

 ENCODE_PORT="${ENCODE_PORT:-19534}"
 PREFILL_PORT="${PREFILL_PORT:-19535}"
@@ -84,10 +84,10 @@ trap cleanup TERM

 # clear previous cache
 echo "remove previous ec cache folder"
-rm -rf $EC_SHARED_STORAGE_PATH
+rm -rf "$EC_SHARED_STORAGE_PATH"

 echo "make ec cache folder"
-mkdir -p $EC_SHARED_STORAGE_PATH
+mkdir -p "$EC_SHARED_STORAGE_PATH"

 ###############################################################################
 # Encoder worker
@@ -100,7 +100,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
    --no-enable-prefix-caching \
    --max-num-batched-tokens 114688 \
    --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
    --ec-transfer-config '{
        "ec_connector": "ECExampleConnector",
        "ec_role": "ec_producer",
@@ -124,7 +124,7 @@ vllm serve "$MODEL" \
    --enforce-eager \
    --enable-request-id-headers \
    --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
    --ec-transfer-config '{
        "ec_connector": "ECExampleConnector",
        "ec_role": "ec_consumer",
@@ -152,7 +152,7 @@ vllm serve "$MODEL" \
    --enforce-eager \
    --enable-request-id-headers \
    --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
    --kv-transfer-config '{
        "kv_connector": "NixlConnector",
        "kv_role": "kv_consumer"
@@ -162,9 +162,9 @@ vllm serve "$MODEL" \
 PIDS+=($!)

 # Wait for workers
-wait_for_server $ENCODE_PORT
-wait_for_server $PREFILL_PORT
-wait_for_server $DECODE_PORT
+wait_for_server "$ENCODE_PORT"
+wait_for_server "$PREFILL_PORT"
+wait_for_server "$DECODE_PORT"

 ###############################################################################
 # Proxy
@@ -179,7 +179,7 @@ python disagg_epd_proxy.py \

 PIDS+=($!)

-wait_for_server $PROXY_PORT
+wait_for_server "$PROXY_PORT"
 echo "All services are up!"

 ###############################################################################
@@ -187,14 +187,14 @@ echo "All services are up!"
 ###############################################################################
 echo "Running benchmark (stream)..."
 vllm bench serve \
-  --model               $MODEL \
+  --model               "$MODEL" \
  --backend             openai-chat \
  --endpoint            /v1/chat/completions \
  --dataset-name        hf \
  --dataset-path        lmarena-ai/VisionArena-Chat \
  --seed                0 \
-  --num-prompts         $NUM_PROMPTS \
-  --port                $PROXY_PORT
+  --num-prompts         "$NUM_PROMPTS" \
+  --port                "$PROXY_PORT"

 PIDS+=($!)

@@ -202,10 +202,10 @@ PIDS+=($!)
 # Single request with local image
 ###############################################################################
 echo "Running single request with local image (non-stream)..."
-curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+curl http://127.0.0.1:"${PROXY_PORT}"/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
-    "model": "'${MODEL}'",
+    "model": "'"${MODEL}"'",
    "messages": [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": [

--- a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
+++ b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
@@ -8,7 +8,7 @@ declare -a PIDS=()
 ###############################################################################
 MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
 LOG_PATH="${LOG_PATH:-./logs}"
-mkdir -p $LOG_PATH
+mkdir -p "$LOG_PATH"

 ENCODE_PORT="${ENCODE_PORT:-19534}"
 PREFILL_DECODE_PORT="${PREFILL_DECODE_PORT:-19535}"
@@ -78,10 +78,10 @@ trap cleanup TERM

 # clear previous cache
 echo "remove previous ec cache folder"
-rm -rf $EC_SHARED_STORAGE_PATH
+rm -rf "$EC_SHARED_STORAGE_PATH"

 echo "make ec cache folder"
-mkdir -p $EC_SHARED_STORAGE_PATH
+mkdir -p "$EC_SHARED_STORAGE_PATH"

 ###############################################################################
 # Encoder worker
@@ -94,7 +94,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
    --no-enable-prefix-caching \
    --max-num-batched-tokens 114688 \
    --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
    --ec-transfer-config '{
        "ec_connector": "ECExampleConnector",
        "ec_role": "ec_producer",
@@ -115,7 +115,7 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
    --enforce-eager \
    --enable-request-id-headers \
    --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
    --ec-transfer-config '{
        "ec_connector": "ECExampleConnector",
        "ec_role": "ec_consumer",
@@ -128,8 +128,8 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
 PIDS+=($!)

 # Wait for workers
-wait_for_server $ENCODE_PORT
-wait_for_server $PREFILL_DECODE_PORT
+wait_for_server "$ENCODE_PORT"
+wait_for_server "$PREFILL_DECODE_PORT"

 ###############################################################################
 # Proxy
@@ -144,7 +144,7 @@ python disagg_epd_proxy.py \

 PIDS+=($!)

-wait_for_server $PROXY_PORT
+wait_for_server "$PROXY_PORT"
 echo "All services are up!"

 ###############################################################################
@@ -152,14 +152,14 @@ echo "All services are up!"
 ###############################################################################
 echo "Running benchmark (stream)..."
 vllm bench serve \
-  --model               $MODEL \
+  --model               "$MODEL" \
  --backend             openai-chat \
  --endpoint            /v1/chat/completions \
  --dataset-name        hf \
  --dataset-path        lmarena-ai/VisionArena-Chat \
  --seed                0 \
-  --num-prompts         $NUM_PROMPTS \
-  --port                $PROXY_PORT
+  --num-prompts         "$NUM_PROMPTS" \
+  --port                "$PROXY_PORT"

 PIDS+=($!)

@@ -167,10 +167,10 @@ PIDS+=($!)
 # Single request with local image
 ###############################################################################
 echo "Running single request with local image (non-stream)..."
-curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+curl http://127.0.0.1:"${PROXY_PORT}"/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
-    "model": "'${MODEL}'",
+    "model": "'"${MODEL}"'",
    "messages": [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": [

--- a/examples/online_serving/disaggregated_prefill.sh
+++ b/examples/online_serving/disaggregated_prefill.sh
@@ -54,7 +54,7 @@ wait_for_server() {
 # You can also adjust --kv-ip and --kv-port for distributed inference.

 # prefilling instance, which is the KV producer
-CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \
+CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL_NAME" \
    --host 0.0.0.0 \
    --port 8100 \
    --max-model-len 100 \
@@ -64,7 +64,7 @@ CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \
    '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":"1e9","kv_port":"14579","kv_connector_extra_config":{"proxy_ip":"'"$VLLM_HOST_IP"'","proxy_port":"30001","http_ip":"'"$VLLM_HOST_IP"'","http_port":"8100","send_type":"PUT_ASYNC"}}' &

 # decoding instance, which is the KV consumer  
-CUDA_VISIBLE_DEVICES=1 vllm serve $MODEL_NAME \
+CUDA_VISIBLE_DEVICES=1 vllm serve "$MODEL_NAME" \
    --host 0.0.0.0 \
    --port 8200 \
    --max-model-len 100 \

--- a/examples/online_serving/disaggregated_serving/kv_events.sh
+++ b/examples/online_serving/disaggregated_serving/kv_events.sh
@@ -34,7 +34,7 @@ wait_for_server() {
    done" && return 0 || return 1
 }

-vllm serve $MODEL_NAME \
+vllm serve "$MODEL_NAME" \
    --port 8100 \
    --max-model-len 100 \
    --enforce-eager \

--- a/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh
+++ b/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh
@@ -143,7 +143,7 @@ main() {
    IFS=',' read -ra BOOTSTRAP_PORT_ARRAY <<< "$BOOTSTRAP_PORTS"
    IFS=',' read -ra DECODE_PORT_ARRAY <<< "$DECODE_PORTS"

-    proxy_param=""
+    proxy_args=()

    # =============================================================================
    # Launch Prefill Servers (X Producers)
@@ -156,12 +156,12 @@ main() {
        local bootstrap_port=${BOOTSTRAP_PORT_ARRAY[$i]}

        echo "  Prefill server $((i+1)): GPU $gpu_id, Port $port, Bootstrap Port $bootstrap_port"
-        VLLM_MOONCAKE_BOOTSTRAP_PORT=$bootstrap_port CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
-        --port $port \
+        VLLM_MOONCAKE_BOOTSTRAP_PORT=$bootstrap_port CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
+        --port "$port" \
        --kv-transfer-config \
        "{\"kv_connector\":\"MooncakeConnector\",\"kv_role\":\"kv_producer\"}" > prefill$((i+1)).log 2>&1 &
        PIDS+=($!)
-        proxy_param="${proxy_param} --prefill http://0.0.0.0:${port} $bootstrap_port"
+        proxy_args+=(--prefill "http://0.0.0.0:${port}" "$bootstrap_port")
    done

    # =============================================================================
@@ -174,12 +174,12 @@ main() {
        local port=${DECODE_PORT_ARRAY[$i]}

        echo "  Decode server $((i+1)): GPU $gpu_id, Port $port"
-        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
-        --port $port \
+        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
+        --port "$port" \
        --kv-transfer-config \
        "{\"kv_connector\":\"MooncakeConnector\",\"kv_role\":\"kv_consumer\"}" > decode$((i+1)).log 2>&1 &
        PIDS+=($!)
-        proxy_param="${proxy_param} --decode http://0.0.0.0:${port}"
+        proxy_args+=(--decode "http://0.0.0.0:${port}")
    done

    # =============================================================================
@@ -187,7 +187,7 @@ main() {
    # =============================================================================
    echo ""
    echo "Starting proxy server on port $PROXY_PORT..."
-    python3 mooncake_connector_proxy.py $proxy_param --port $PROXY_PORT > proxy.log 2>&1 &
+    python3 mooncake_connector_proxy.py "${proxy_args[@]}" --port "$PROXY_PORT" > proxy.log 2>&1 &
    PIDS+=($!)

    # =============================================================================
@@ -196,9 +196,10 @@ main() {
    echo ""
    echo "Waiting for all servers to start..."
    for port in "${PREFILL_PORT_ARRAY[@]}" "${DECODE_PORT_ARRAY[@]}"; do
-        if ! wait_for_server $port; then
+        if ! wait_for_server "$port"; then
            echo "Failed to start server on port $port"
            cleanup
+            # shellcheck disable=SC2317
            exit 1
        fi
    done
@@ -209,8 +210,8 @@ main() {
    # =============================================================================
    # Run Benchmark
    # =============================================================================
-    vllm bench serve --port $PROXY_PORT --seed $(date +%s) \
-        --backend vllm --model $MODEL \
+    vllm bench serve --port "$PROXY_PORT" --seed "$(date +%s)" \
+        --backend vllm --model "$MODEL" \
        --dataset-name random --random-input-len 7500 --random-output-len 200 \
        --num-prompts 200 --burstiness 100 --request-rate 2 | tee benchmark.log


--- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
+++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
@@ -166,10 +166,10 @@ main() {
        local kv_port=$((21001 + i))

        echo "  Prefill server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port"
-        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
+        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
        --enforce-eager \
        --host 0.0.0.0 \
-        --port $port \
+        --port "$port" \
        --tensor-parallel-size 1 \
        --seed 1024 \
        --dtype float16 \
@@ -194,10 +194,10 @@ main() {
        local kv_port=$((22001 + i))

        echo "  Decode server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port"
-        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
+        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
        --enforce-eager \
        --host 0.0.0.0 \
-        --port $port \
+        --port "$port" \
        --tensor-parallel-size 1 \
        --seed 1024 \
        --dtype float16 \
@@ -217,9 +217,10 @@ main() {
    echo ""
    echo "Waiting for all servers to start..."
    for port in "${PREFILL_PORT_ARRAY[@]}" "${DECODE_PORT_ARRAY[@]}"; do
-        if ! wait_for_server $port; then
+        if ! wait_for_server "$port"; then
            echo "Failed to start server on port $port"
            cleanup
+            # shellcheck disable=SC2317
            exit 1
        fi
    done
@@ -231,8 +232,8 @@ main() {
    # Run Benchmark
    # =============================================================================
    cd ../../../benchmarks/
-    vllm bench serve --port 10001 --seed $(date +%s) \
-        --model $MODEL \
+    vllm bench serve --port 10001 --seed "$(date +%s)" \
+        --model "$MODEL" \
        --dataset-name random --random-input-len 7500 --random-output-len 200 \
        --num-prompts 200 --burstiness 100 --request-rate 2 | tee benchmark.log


--- a/examples/online_serving/elastic_ep/bench.sh
+++ b/examples/online_serving/elastic_ep/bench.sh
@@ -50,8 +50,8 @@ while [[ $# -gt 0 ]]; do
 done

 vllm bench serve \
-    --model $MODEL_NAME \
-    --host $HOST \
-    --port $PORT \
-    --num-prompts $NUM_PROMPTS \
-    --request-rate $REQUEST_RATE
+    --model "$MODEL_NAME" \
+    --host "$HOST" \
+    --port "$PORT" \
+    --num-prompts "$NUM_PROMPTS" \
+    --request-rate "$REQUEST_RATE"
--- a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
+++ b/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
@@ -57,15 +57,15 @@ echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALL
 export RAY_DEDUP_LOGS=0
 export VLLM_USE_DEEP_GEMM=1

-vllm serve $MODEL_NAME \
-    --data-parallel-size $DATA_PARALLEL_SIZE \
-    --data-parallel-size-local $DATA_PARALLEL_SIZE \
+vllm serve "$MODEL_NAME" \
+    --data-parallel-size "$DATA_PARALLEL_SIZE" \
+    --data-parallel-size-local "$DATA_PARALLEL_SIZE" \
    --data-parallel-backend ray \
    --enforce-eager \
    --enable-expert-parallel \
    --enable-eplb \
    --all2all-backend pplx \
-    --num-redundant-experts $REDUNDANT_EXPERTS \
+    --num-redundant-experts "$REDUNDANT_EXPERTS" \
    --trust-remote-code \
-    --host $HOST \
-    --port $PORT
+    --host "$HOST" \
+    --port "$PORT"
--- a/examples/online_serving/multi-node-serving.sh
+++ b/examples/online_serving/multi-node-serving.sh
@@ -57,8 +57,7 @@ case "$subcommand" in

    # Retry until the worker node connects to the head node or the timeout expires.
    for (( i=0; i < $ray_init_timeout; i+=5 )); do
-      ray start --address=$ray_address:$ray_port --block "${start_params[@]}"
-      if [ $? -eq 0 ]; then
+      if ray start --address="$ray_address":"$ray_port" --block "${start_params[@]}"; then
        echo "Worker: Ray runtime started with head address $ray_address:$ray_port"
        exit 0
      fi
@@ -95,12 +94,12 @@ case "$subcommand" in
    fi

    # Start the Ray head node.
-    ray start --head --port=$ray_port "${start_params[@]}"
+    ray start --head --port="$ray_port" "${start_params[@]}"

    # Poll Ray until every worker node is active.
    for (( i=0; i < $ray_init_timeout; i+=5 )); do
-        active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'`
-        if [ $active_nodes -eq $ray_cluster_size ]; then
+        active_nodes=$(python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))')
+        if [ "$active_nodes" -eq "$ray_cluster_size" ]; then
          echo "All ray workers are active and the ray cluster is initialized successfully."
          exit 0
        fi

--- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
+++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
@@ -22,11 +22,10 @@ check_hf_token() {

 check_num_gpus() {
    # can you check if the number of GPUs are >=2 via nvidia-smi/rocm-smi?
-    which rocm-smi > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if ! which rocm-smi > /dev/null 2>&1; then
 	num_gpus=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
    else
-	num_gpus=$(rocm-smi --showid | grep Instinct | wc -l)
+	num_gpus=$(rocm-smi --showid | grep -c Instinct)
    fi

    if [ "$num_gpus" -lt 2 ]; then
@@ -39,8 +38,7 @@ check_num_gpus() {

 ensure_python_library_installed() {
    echo "Checking if $1 is installed..."
-    python3 -c "import $1" > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if ! python3 -c "import $1" > /dev/null 2>&1; then
        if [ "$1" == "nixl" ]; then
            echo "$1 is not installed. Please refer to https://github.com/ai-dynamo/nixl for installation."
        else
@@ -102,12 +100,12 @@ main() {
    bash disagg_vllm_launcher.sh prefiller \
        > >(tee prefiller.log) 2>&1 &
    prefiller_pid=$!
-    PIDS+=($prefiller_pid)
+    PIDS+=("$prefiller_pid")

    bash disagg_vllm_launcher.sh decoder  \
        > >(tee decoder.log)  2>&1 &
    decoder_pid=$!
-    PIDS+=($decoder_pid)
+    PIDS+=("$decoder_pid")

    python3 disagg_proxy_server.py \
        --host localhost \
@@ -118,7 +116,7 @@ main() {
        --decoder-port 8200  \
        > >(tee proxy.log)    2>&1 &
    proxy_pid=$!
-    PIDS+=($proxy_pid)
+    PIDS+=("$proxy_pid")

    wait_for_server 8100
    wait_for_server 8200
@@ -128,7 +126,7 @@ main() {

    # begin benchmark
    cd ../../../../benchmarks/
-    vllm bench serve --port 9000 --seed $(date +%s) \
+    vllm bench serve --port 9000 --seed "$(date +%s)" \
        --model meta-llama/Llama-3.1-8B-Instruct \
        --dataset-name random --random-input-len 7500 --random-output-len 200 \
        --num-prompts 200 --burstiness 100 --request-rate 3.6 | tee benchmark.log

--- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
+++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
@@ -34,7 +34,7 @@ if [[ $1 == "prefiller" ]]; then
        VLLM_ENABLE_V1_MULTIPROCESSING=1 \
        VLLM_WORKER_MULTIPROC_METHOD=spawn \
        CUDA_VISIBLE_DEVICES=0 \
-        vllm serve $MODEL \
+        vllm serve "$MODEL" \
        --port 8100 \
        --enforce-eager \
        --kv-transfer-config \
@@ -51,7 +51,7 @@ elif [[ $1 == "decoder" ]]; then
        VLLM_ENABLE_V1_MULTIPROCESSING=1 \
        VLLM_WORKER_MULTIPROC_METHOD=spawn \
        CUDA_VISIBLE_DEVICES=1 \
-        vllm serve $MODEL \
+        vllm serve "$MODEL" \
        --port 8200 \
        --enforce-eager \
        --kv-transfer-config \

--- a/examples/pooling/embed/openai_embedding_long_text/service.sh
+++ b/examples/pooling/embed/openai_embedding_long_text/service.sh
@@ -103,7 +103,7 @@ vllm serve "$MODEL_NAME" \
  --tensor-parallel-size "$GPU_COUNT" \
  --enforce-eager \
  --pooler-config "$POOLER_CONFIG" \
-  --served-model-name ${MODEL_CODE} \
+  --served-model-name "${MODEL_CODE}" \
  --api-key "$API_KEY" \
  --trust-remote-code \
  --port "$PORT" \