chore: Add SERVED_MODEL_NAME for consistent model name regardless of MODEL_PATH (#1632)

2becce56 · Ryan McCormick · GitHub · 57f5725d · 2becce56 · 2becce56
Unverified Commit 2becce56 authored Jun 25, 2025 by Ryan McCormick Committed by GitHub Jun 24, 2025
2 changed files
--- a/examples/tensorrt_llm/configs/deepseek_r1/multinode/README.md
+++ b/examples/tensorrt_llm/configs/deepseek_r1/multinode/README.md
@@ -99,6 +99,13 @@ export MOUNTS="${PWD}:/mnt"
 # https://huggingface.co/deepseek-ai/DeepSeek-R1
 export MODEL_PATH="nvidia/DeepSeek-R1-FP4"

+# The name the model will be served/queried under, matching what's
+# returned by the /v1/models endpoint.
+#
+# By default this is inferred from MODEL_PATH, but when using locally downloaded
+# model weights, it can be nice to have explicit control over the name.
+export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4"
+
 # NOTE: This path assumes you have mounted the config file into /mnt inside
 # the container. See the MOUNTS variable in srun_script.sh
 export ENGINE_CONFIG="/mnt/agg_DEP16_dsr1.yaml"
@@ -148,7 +155,7 @@ export ENGINE_CONFIG="/mnt/agg_DEP16_dsr1.yaml"
 4. After the model fully finishes loading on all ranks, the worker will register itself,
   and the OpenAI frontend will detect it, signaled by this output:
    ```
-    0: 2025-06-13T02:46:35.040Z  INFO dynamo_llm::discovery::watcher: added model model_name="Deepseek-R1-FP4"
+    0: 2025-06-13T02:46:35.040Z  INFO dynamo_llm::discovery::watcher: added model model_name="nvidia/DeepSeek-R1-FP4"
    ```
 5. At this point, with the worker fully initialized and detected by the frontend,
   it is now ready for inference.
@@ -161,11 +168,11 @@ To verify the deployed model is working, send a `curl` request:
 # NOTE: $HOST assumes running on head node, but can be changed to $HEAD_NODE_IP instead.
 HOST=localhost
 PORT=8000
-MODEL=Deepseek-R1-FP4
+# "model" here should match the model name returned by the /v1/models endpoint
 curl -w "%{http_code}" ${HOST}:${PORT}/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
-  "model": "'${MODEL}'",
+  "model": "'${SERVED_MODEL_NAME}'",
  "messages": [
  {
    "role": "user",

--- a/examples/tensorrt_llm/configs/deepseek_r1/multinode/start_trtllm_worker.sh
+++ b/examples/tensorrt_llm/configs/deepseek_r1/multinode/start_trtllm_worker.sh
@@ -10,6 +10,12 @@ if [[ -z ${MODEL_PATH} ]]; then
    exit 1
 fi

+if [[ -z ${SERVED_MODEL_NAME} ]]; then
+    echo "WARNING: SERVED_MODEL_NAME was not set. It will be derived from MODEL_PATH."
+fi
+
+
+
 if [[ -z ${ENGINE_CONFIG} ]]; then
    echo "ERROR: ENGINE_CONFIG was not set."
    echo "ERROR: ENGINE_CONFIG must be set to a valid Dynamo+TRTLLM engine config file."
@@ -23,4 +29,5 @@ fi
 trtllm-llmapi-launch \
  python3 /workspace/launch/dynamo-run/src/subprocess/trtllm_inc.py \
    --model-path "${MODEL_PATH}" \
+    --model-name "${SERVED_MODEL_NAME}" \
    --extra-engine-args "${ENGINE_CONFIG}"