grpcurl.sh 1.43 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0


# Invoke the mock KServe gRPC endpoint using grpcurl. Requires grpcurl installed.
# The service does not expose server reflection, so we point grpcurl at the proto files directly.

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROTO_DIR="${SCRIPT_DIR}/../../../../llm/src/grpc/protos"

HOST="${HOST:-127.0.0.1}"
PORT="${PORT:-8787}"
MODEL="mock_model"

if [[ $# -gt 0 ]]; then
  PROMPTS=("$@")
else
  PROMPTS=(
    "Hello from Dynamo!"
    "How are you today?"
    "Tell me a joke."
  )
fi

encode_base64() {
  local text="$1"
  python - "$text" <<'PY'
import base64
import sys

print(base64.b64encode(sys.argv[1].encode("utf-8")).decode("ascii"))
PY
}

run_infer() {
  local prompt="$1"
  local encoded
  encoded="$(encode_base64 "$prompt")"

  printf -- '---\nSending prompt: %s\n' "$prompt"

  grpcurl \
    -plaintext \
    -import-path "${PROTO_DIR}" \
    -proto kserve.proto \
    -d "{
      \"model_name\": \"${MODEL}\",
      \"inputs\": [
        {
          \"name\": \"text_input\",
          \"datatype\": \"BYTES\",
          \"shape\": [1],
          \"contents\": { \"bytesContents\": [\"${encoded}\"] }
        }
      ]
    }" \
    "${HOST}:${PORT}" inference.GRPCInferenceService/ModelInfer
}

for prompt in "${PROMPTS[@]}"; do
  run_infer "$prompt"
done