fix: Handle invalid JSON in config.json (#3043)

Signed-off-by: Graham King <grahamk@nvidia.com>

fix: Handle invalid JSON in config.json (#3043)
Signed-off-by: Graham King <grahamk@nvidia.com>
b1186aee · Graham King · GitHub · 7af49a15 · b1186aee · b1186aee
Unverified Commit b1186aee authored Sep 15, 2025 by Graham King Committed by GitHub Sep 15, 2025
7 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2017,6 +2017,7 @@ dependencies = [
 "humantime",
 "insta",
 "itertools 0.14.0",
+ "json-five",
 "lazy_static",
 "memmap2",
 "minijinja",
@@ -3933,6 +3934,16 @@ dependencies = [
 "wasm-bindgen",
 ]
+[[package]]
+name = "json-five"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56bf719068ddd382e66ee32cf044805aa8203bf9999b5af007bd0367fb681c4d"
+dependencies = [
+ "serde",
+ "unicode-general-category",
+]
 [[package]]
 name = "jsonschema"
 version = "0.17.1"
@@ -8725,6 +8736,12 @@ version = "2.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
+[[package]]
+name = "unicode-general-category"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24adfe8311434967077a6adff125729161e6e4934d76f6b7c55318ac5c9246d3"
 [[package]]
 name = "unicode-ident"
 version = "1.0.18"

--- a/lib/llm/Cargo.toml
+++ b/lib/llm/Cargo.toml
@@ -147,6 +147,7 @@ erased-serde = { version = "0.4" }
 itertools = { version = "0.14.0" }
 minijinja = { version = "2.10.2", features = ["loader"] }
 minijinja-contrib = { version = "2.10.2", features = ["pycompat"] }
+json-five = { version = "0.3" }
 # GGUF
 ggus = "0.4.0"

--- a/lib/llm/src/lib.rs
+++ b/lib/llm/src/lib.rs
@@ -99,6 +99,56 @@ pub fn file_json_field<T: serde::de::DeserializeOwned>(
    })
 }
+/// Pretty-print the part of JSON that has an error.
+pub fn log_json_err(filename: &str, json: &str, err: &serde_json::Error) {
+    const ERROR_PREFIX: &str = ">>     ";
+    // Only log errors that relate to the content of the JSON file
+    if !(err.is_syntax() || err.is_data()) {
+        return;
+    }
+    // These are 1 based for humans so subtract
+    let line = err.line().saturating_sub(1);
+    let column = err.column().saturating_sub(1);
+    let json_lines: Vec<&str> = json.lines().collect();
+    if json_lines.is_empty() {
+        tracing::error!("JSON parsing error in {filename}: File is empty.");
+        return;
+    }
+    // Two lines before
+    let start_index = (line - 2).max(0);
+    // The problem line and two lines after
+    let end_index = (line + 3).min(json_lines.len());
+    // Collect the context
+    let mut context_lines: Vec<String> = (start_index..end_index)
+        .map(|i| {
+            if i == line {
+                format!("{ERROR_PREFIX}{}", json_lines[i])
+            } else {
+                // Six places because tokenizer.json is very long
+                format!("{:06} {}", i + 1, json_lines[i])
+            }
+        })
+        .collect();
+    // Insert the column indicator
+    let col_indicator = "_".to_string().repeat(column + ERROR_PREFIX.len()) + "^";
+    let error_in_context_idx = line - start_index;
+    if error_in_context_idx < context_lines.len() {
+        context_lines.insert(error_in_context_idx + 1, col_indicator);
+    }
+    tracing::error!(
+        "JSON parsing error in {filename}: Line {}, column {}:\n{}",
+        err.line(),
+        err.column(),
+        context_lines.join("\n")
+    );
+}
 #[cfg(test)]
 mod file_json_field_tests {
    use super::file_json_field;

--- a/lib/llm/src/model_card.rs
+++ b/lib/llm/src/model_card.rs
@@ -174,12 +174,16 @@ impl ModelDeploymentCard {
    /// Load a model deployment card from a JSON file
    pub fn load_from_json_file<P: AsRef<Path>>(file: P) -> std::io::Result<Self> {
-        Ok(serde_json::from_str(&std::fs::read_to_string(file)?)?)
+        let contents = std::fs::read_to_string(&file)?;
+        Ok(serde_json::from_str(&contents).inspect_err(|err| {
+            crate::log_json_err(&file.as_ref().display().to_string(), &contents, err)
+        })?)
    }
    /// Load a model deployment card from a JSON string
-    pub fn load_from_json_str(json: &str) -> Result<Self, anyhow::Error> {
+    pub fn load_from_json_str(contents: &str) -> Result<Self, anyhow::Error> {
-        Ok(serde_json::from_str(json)?)
+        Ok(serde_json::from_str(contents)
+            .inspect_err(|err| crate::log_json_err("unknown", contents, err))?)
    }
    //
@@ -227,7 +231,15 @@ impl ModelDeploymentCard {
                let p = checked_file.path().ok_or_else(||
                    anyhow::anyhow!("Tokenizer is URL-backed ({:?}); call move_from_nats() before tokenizer_hf()", checked_file.url())
                )?;
-                HfTokenizer::from_file(p).map_err(anyhow::Error::msg)
+                HfTokenizer::from_file(p)
+                    .inspect_err(|err| {
+                        if let Some(serde_err) = err.downcast_ref::<serde_json::Error>()
+                            && let Ok(contents) = std::fs::read_to_string(p)
+                        {
+                            crate::log_json_err(&p.display().to_string(), &contents, serde_err);
+                        }
+                    })
+                    .map_err(anyhow::Error::msg)
            }
            Some(TokenizerKind::GGUF(t)) => Ok(*t.clone()),
            None => {
@@ -627,11 +639,18 @@ impl HFConfig {
    fn from_json_file<P: AsRef<Path>>(file: P) -> Result<Arc<dyn ModelInfo>> {
        let file_path = file.as_ref();
        let contents = std::fs::read_to_string(file_path)?;
-        let mut config: Self = serde_json::from_str(&contents)?;
+        let mut config: Self = json_five::from_str(&contents)
+            .inspect_err(|err| {
+                tracing::error!(path=%file_path.display(), %err, "Failed to parse config.json as JSON5");
+            })?;
        if config.text_config.is_none() {
-            let text_config: HFTextConfig = serde_json::from_str(&contents)?;
+            let text_config: HFTextConfig = json_five::from_str(&contents)
+                .inspect_err(|err| {
+                    tracing::error!(path=%file_path.display(), %err, "Failed to parse text config from config.json as JSON5");
+                })?;
            config.text_config = Some(text_config);
        }
        // Sometimes bos_token_id is in generation_config.json not config.json
        let Some(text_config) = config.text_config.as_mut() else {
            anyhow::bail!(
@@ -882,4 +901,14 @@ mod tests {
        assert_eq!(config.bos_token_id(), 200000);
        Ok(())
    }
+    /// The Python JSON parser accepts `Infinity` as a numeric value. This is explicitly against the
+    /// JSON spec, but inevitably people rely on it, so we have to allow it.
+    /// We treat that file as JSON5 (a lenient superset of JSON) to be able to parse it.
+    #[test]
+    fn test_invalid_json_but_py_accepts_it() {
+        dynamo_runtime::logging::init();
+        let path = "tests/data/sample-models/NVIDIA-Nemotron-Nano-12B-v2-Base/config.json";
+        let _ = HFConfig::from_json_file(path).unwrap();
+    }
 }
--- a/lib/llm/src/preprocessor/prompt/template.rs
+++ b/lib/llm/src/preprocessor/prompt/template.rs
@@ -30,9 +30,12 @@ impl PromptFormatter {
                        mdc.display_name
                    );
                };
-                let content = std::fs::read_to_string(file)
+                let contents = std::fs::read_to_string(file)
                    .with_context(|| format!("fs:read_to_string '{}'", file.display()))?;
-                let mut config: ChatTemplate = serde_json::from_str(&content)?;
+                let mut config: ChatTemplate =
+                    serde_json::from_str(&contents).inspect_err(|err| {
+                        crate::log_json_err(&file.display().to_string(), &contents, err)
+                    })?;
                // Some HF model (i.e. meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8)
                // stores the chat template in a separate file, we check if the file exists and

--- a/lib/llm/src/request_template.rs
+++ b/lib/llm/src/request_template.rs
 // SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
@@ -24,7 +15,8 @@ pub struct RequestTemplate {
 impl RequestTemplate {
    pub fn load(path: &Path) -> Result<Self> {
        let template = std::fs::read_to_string(path)?;
-        let template: Self = serde_json::from_str(&template)?;
+        let template: Self = serde_json::from_str(&template)
+            .inspect_err(|err| crate::log_json_err(&path.display().to_string(), &template, err))?;
        Ok(template)
    }
 }
--- a/lib/llm/tests/data/sample-models/NVIDIA-Nemotron-Nano-12B-v2-Base/config.json
+++ b/lib/llm/tests/data/sample-models/NVIDIA-Nemotron-Nano-12B-v2-Base/config.json
+{
+  "architectures": [
+    "NemotronHForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_nemotron_h.NemotronHConfig",
+    "AutoModelForCausalLM": "modeling_nemotron_h.NemotronHForCausalLM"
+  },
+  "bos_token_id": 1,
+  "chunk_size": 128,
+  "conv_kernel": 4,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_dropout": 0.0,
+  "hidden_size": 5120,
+  "hybrid_override_pattern": "M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M-",
+  "initializer_range": 0.02,
+  "intermediate_size": 20480,
+  "layer_norm_epsilon": 1e-05,
+  "mamba_head_dim": 80,
+  "mamba_hidden_act": "silu",
+  "mamba_num_groups": 8,
+  "mamba_num_heads": 128,
+  "mamba_proj_bias": false,
+  "mamba_state_dim": 128,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "mlp_hidden_act": "relu2",
+  "model_type": "nemotron_h",
+  "n_groups": 8,
+  "num_attention_heads": 40,
+  "num_hidden_layers": 62,
+  "num_key_value_heads": 8,
+  "num_logits_to_keep": 1,
+  "num_query_groups": 8,
+  "pad_token_id": 0,
+  "rescale_prenorm_residual": true,
+  "residual_in_fp32": false,
+  "rms_norm_eps": 1e-05,
+  "sliding_window": null,
+  "ssm_state_size": 128,
+  "tie_word_embeddings": false,
+  "time_step_floor": 0.0001,
+  "time_step_limit": [
+    0.0,
+    Infinity
+  ],
+  "time_step_max": 0.1,
+  "time_step_min": 0.001,
+  "time_step_rank": 256,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.2",
+  "use_bias": false,
+  "use_cache": true,
+  "use_conv_bias": true,
+  "use_mamba_kernels": true,
+  "vocab_size": 131072
+}