Unverified Commit b1186aee authored by Graham King's avatar Graham King Committed by GitHub
Browse files

fix: Handle invalid JSON in config.json (#3043)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 7af49a15
...@@ -2017,6 +2017,7 @@ dependencies = [ ...@@ -2017,6 +2017,7 @@ dependencies = [
"humantime", "humantime",
"insta", "insta",
"itertools 0.14.0", "itertools 0.14.0",
"json-five",
"lazy_static", "lazy_static",
"memmap2", "memmap2",
"minijinja", "minijinja",
...@@ -3933,6 +3934,16 @@ dependencies = [ ...@@ -3933,6 +3934,16 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "json-five"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56bf719068ddd382e66ee32cf044805aa8203bf9999b5af007bd0367fb681c4d"
dependencies = [
"serde",
"unicode-general-category",
]
[[package]] [[package]]
name = "jsonschema" name = "jsonschema"
version = "0.17.1" version = "0.17.1"
...@@ -8725,6 +8736,12 @@ version = "2.8.1" ...@@ -8725,6 +8736,12 @@ version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]]
name = "unicode-general-category"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24adfe8311434967077a6adff125729161e6e4934d76f6b7c55318ac5c9246d3"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.18" version = "1.0.18"
......
...@@ -147,6 +147,7 @@ erased-serde = { version = "0.4" } ...@@ -147,6 +147,7 @@ erased-serde = { version = "0.4" }
itertools = { version = "0.14.0" } itertools = { version = "0.14.0" }
minijinja = { version = "2.10.2", features = ["loader"] } minijinja = { version = "2.10.2", features = ["loader"] }
minijinja-contrib = { version = "2.10.2", features = ["pycompat"] } minijinja-contrib = { version = "2.10.2", features = ["pycompat"] }
json-five = { version = "0.3" }
# GGUF # GGUF
ggus = "0.4.0" ggus = "0.4.0"
......
...@@ -99,6 +99,56 @@ pub fn file_json_field<T: serde::de::DeserializeOwned>( ...@@ -99,6 +99,56 @@ pub fn file_json_field<T: serde::de::DeserializeOwned>(
}) })
} }
/// Pretty-print the part of JSON that has an error.
pub fn log_json_err(filename: &str, json: &str, err: &serde_json::Error) {
const ERROR_PREFIX: &str = ">> ";
// Only log errors that relate to the content of the JSON file
if !(err.is_syntax() || err.is_data()) {
return;
}
// These are 1 based for humans so subtract
let line = err.line().saturating_sub(1);
let column = err.column().saturating_sub(1);
let json_lines: Vec<&str> = json.lines().collect();
if json_lines.is_empty() {
tracing::error!("JSON parsing error in {filename}: File is empty.");
return;
}
// Two lines before
let start_index = (line - 2).max(0);
// The problem line and two lines after
let end_index = (line + 3).min(json_lines.len());
// Collect the context
let mut context_lines: Vec<String> = (start_index..end_index)
.map(|i| {
if i == line {
format!("{ERROR_PREFIX}{}", json_lines[i])
} else {
// Six places because tokenizer.json is very long
format!("{:06} {}", i + 1, json_lines[i])
}
})
.collect();
// Insert the column indicator
let col_indicator = "_".to_string().repeat(column + ERROR_PREFIX.len()) + "^";
let error_in_context_idx = line - start_index;
if error_in_context_idx < context_lines.len() {
context_lines.insert(error_in_context_idx + 1, col_indicator);
}
tracing::error!(
"JSON parsing error in {filename}: Line {}, column {}:\n{}",
err.line(),
err.column(),
context_lines.join("\n")
);
}
#[cfg(test)] #[cfg(test)]
mod file_json_field_tests { mod file_json_field_tests {
use super::file_json_field; use super::file_json_field;
......
...@@ -174,12 +174,16 @@ impl ModelDeploymentCard { ...@@ -174,12 +174,16 @@ impl ModelDeploymentCard {
/// Load a model deployment card from a JSON file /// Load a model deployment card from a JSON file
pub fn load_from_json_file<P: AsRef<Path>>(file: P) -> std::io::Result<Self> { pub fn load_from_json_file<P: AsRef<Path>>(file: P) -> std::io::Result<Self> {
Ok(serde_json::from_str(&std::fs::read_to_string(file)?)?) let contents = std::fs::read_to_string(&file)?;
Ok(serde_json::from_str(&contents).inspect_err(|err| {
crate::log_json_err(&file.as_ref().display().to_string(), &contents, err)
})?)
} }
/// Load a model deployment card from a JSON string /// Load a model deployment card from a JSON string
pub fn load_from_json_str(json: &str) -> Result<Self, anyhow::Error> { pub fn load_from_json_str(contents: &str) -> Result<Self, anyhow::Error> {
Ok(serde_json::from_str(json)?) Ok(serde_json::from_str(contents)
.inspect_err(|err| crate::log_json_err("unknown", contents, err))?)
} }
// //
...@@ -227,7 +231,15 @@ impl ModelDeploymentCard { ...@@ -227,7 +231,15 @@ impl ModelDeploymentCard {
let p = checked_file.path().ok_or_else(|| let p = checked_file.path().ok_or_else(||
anyhow::anyhow!("Tokenizer is URL-backed ({:?}); call move_from_nats() before tokenizer_hf()", checked_file.url()) anyhow::anyhow!("Tokenizer is URL-backed ({:?}); call move_from_nats() before tokenizer_hf()", checked_file.url())
)?; )?;
HfTokenizer::from_file(p).map_err(anyhow::Error::msg) HfTokenizer::from_file(p)
.inspect_err(|err| {
if let Some(serde_err) = err.downcast_ref::<serde_json::Error>()
&& let Ok(contents) = std::fs::read_to_string(p)
{
crate::log_json_err(&p.display().to_string(), &contents, serde_err);
}
})
.map_err(anyhow::Error::msg)
} }
Some(TokenizerKind::GGUF(t)) => Ok(*t.clone()), Some(TokenizerKind::GGUF(t)) => Ok(*t.clone()),
None => { None => {
...@@ -627,11 +639,18 @@ impl HFConfig { ...@@ -627,11 +639,18 @@ impl HFConfig {
fn from_json_file<P: AsRef<Path>>(file: P) -> Result<Arc<dyn ModelInfo>> { fn from_json_file<P: AsRef<Path>>(file: P) -> Result<Arc<dyn ModelInfo>> {
let file_path = file.as_ref(); let file_path = file.as_ref();
let contents = std::fs::read_to_string(file_path)?; let contents = std::fs::read_to_string(file_path)?;
let mut config: Self = serde_json::from_str(&contents)?; let mut config: Self = json_five::from_str(&contents)
.inspect_err(|err| {
tracing::error!(path=%file_path.display(), %err, "Failed to parse config.json as JSON5");
})?;
if config.text_config.is_none() { if config.text_config.is_none() {
let text_config: HFTextConfig = serde_json::from_str(&contents)?; let text_config: HFTextConfig = json_five::from_str(&contents)
.inspect_err(|err| {
tracing::error!(path=%file_path.display(), %err, "Failed to parse text config from config.json as JSON5");
})?;
config.text_config = Some(text_config); config.text_config = Some(text_config);
} }
// Sometimes bos_token_id is in generation_config.json not config.json // Sometimes bos_token_id is in generation_config.json not config.json
let Some(text_config) = config.text_config.as_mut() else { let Some(text_config) = config.text_config.as_mut() else {
anyhow::bail!( anyhow::bail!(
...@@ -882,4 +901,14 @@ mod tests { ...@@ -882,4 +901,14 @@ mod tests {
assert_eq!(config.bos_token_id(), 200000); assert_eq!(config.bos_token_id(), 200000);
Ok(()) Ok(())
} }
/// The Python JSON parser accepts `Infinity` as a numeric value. This is explicitly against the
/// JSON spec, but inevitably people rely on it, so we have to allow it.
/// We treat that file as JSON5 (a lenient superset of JSON) to be able to parse it.
#[test]
fn test_invalid_json_but_py_accepts_it() {
dynamo_runtime::logging::init();
let path = "tests/data/sample-models/NVIDIA-Nemotron-Nano-12B-v2-Base/config.json";
let _ = HFConfig::from_json_file(path).unwrap();
}
} }
...@@ -30,9 +30,12 @@ impl PromptFormatter { ...@@ -30,9 +30,12 @@ impl PromptFormatter {
mdc.display_name mdc.display_name
); );
}; };
let content = std::fs::read_to_string(file) let contents = std::fs::read_to_string(file)
.with_context(|| format!("fs:read_to_string '{}'", file.display()))?; .with_context(|| format!("fs:read_to_string '{}'", file.display()))?;
let mut config: ChatTemplate = serde_json::from_str(&content)?; let mut config: ChatTemplate =
serde_json::from_str(&contents).inspect_err(|err| {
crate::log_json_err(&file.display().to_string(), &contents, err)
})?;
// Some HF model (i.e. meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8) // Some HF model (i.e. meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8)
// stores the chat template in a separate file, we check if the file exists and // stores the chat template in a separate file, we check if the file exists and
......
// SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use anyhow::Result; use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
...@@ -24,7 +15,8 @@ pub struct RequestTemplate { ...@@ -24,7 +15,8 @@ pub struct RequestTemplate {
impl RequestTemplate { impl RequestTemplate {
pub fn load(path: &Path) -> Result<Self> { pub fn load(path: &Path) -> Result<Self> {
let template = std::fs::read_to_string(path)?; let template = std::fs::read_to_string(path)?;
let template: Self = serde_json::from_str(&template)?; let template: Self = serde_json::from_str(&template)
.inspect_err(|err| crate::log_json_err(&path.display().to_string(), &template, err))?;
Ok(template) Ok(template)
} }
} }
{
"architectures": [
"NemotronHForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_nemotron_h.NemotronHConfig",
"AutoModelForCausalLM": "modeling_nemotron_h.NemotronHForCausalLM"
},
"bos_token_id": 1,
"chunk_size": 128,
"conv_kernel": 4,
"eos_token_id": 2,
"head_dim": 128,
"hidden_dropout": 0.0,
"hidden_size": 5120,
"hybrid_override_pattern": "M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M-",
"initializer_range": 0.02,
"intermediate_size": 20480,
"layer_norm_epsilon": 1e-05,
"mamba_head_dim": 80,
"mamba_hidden_act": "silu",
"mamba_num_groups": 8,
"mamba_num_heads": 128,
"mamba_proj_bias": false,
"mamba_state_dim": 128,
"max_position_embeddings": 131072,
"mlp_bias": false,
"mlp_hidden_act": "relu2",
"model_type": "nemotron_h",
"n_groups": 8,
"num_attention_heads": 40,
"num_hidden_layers": 62,
"num_key_value_heads": 8,
"num_logits_to_keep": 1,
"num_query_groups": 8,
"pad_token_id": 0,
"rescale_prenorm_residual": true,
"residual_in_fp32": false,
"rms_norm_eps": 1e-05,
"sliding_window": null,
"ssm_state_size": 128,
"tie_word_embeddings": false,
"time_step_floor": 0.0001,
"time_step_limit": [
0.0,
Infinity
],
"time_step_max": 0.1,
"time_step_min": 0.001,
"time_step_rank": 256,
"torch_dtype": "bfloat16",
"transformers_version": "4.53.2",
"use_bias": false,
"use_cache": true,
"use_conv_bias": true,
"use_mamba_kernels": true,
"vocab_size": 131072
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment