fix: harmony parser streaming fix (#3074)

Signed-off-by: ayushag <ayushag@nvidia.com> Signed-off-by: Graham King <grahamk@nvidia.com> Co-authored-by: Graham King <grahamk@nvidia.com>

fix: harmony parser streaming fix (#3074)
Signed-off-by: ayushag <ayushag@nvidia.com> Signed-off-by: Graham King <grahamk@nvidia.com> Co-authored-by: Graham King <grahamk@nvidia.com>
cd814377 · Ayush Agarwal · GitHub · 78a3feda · cd814377 · cd814377
Unverified Commit cd814377 authored Sep 17, 2025 by Ayush Agarwal Committed by GitHub Sep 17, 2025
7 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2080,6 +2080,7 @@ dependencies = [
 "rustpython-parser",
 "serde",
 "serde_json",
+ "tokio",
 "tracing",
 "uuid 1.18.0",
 ]

--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -653,17 +653,17 @@ impl OpenAIPreprocessor {
    }

    /// Apply tool calling jail to the stream using the preprocessor's tool call parser
-    pub fn apply_tool_calling_jail_with_parser(
+    pub async fn apply_tool_calling_jail_with_parser(
        &self,
        stream: ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>,
    ) -> ManyOut<Annotated<NvCreateChatCompletionStreamResponse>> {
-        apply_tool_calling_jail_internal(stream, self.tool_call_parser.clone())
+        apply_tool_calling_jail_internal(stream, self.tool_call_parser.clone()).await
    }
 }

 /// Apply tool calling jail to the stream - stops/jails the stream under certain conditions
 /// When jailed, the stream will be unjailed when the input stream ends
-pub fn apply_tool_calling_jail_internal(
+pub async fn apply_tool_calling_jail_internal(
    stream: ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>,
    tool_call_parser: Option<String>,
 ) -> ManyOut<Annotated<NvCreateChatCompletionStreamResponse>> {
@@ -677,6 +677,7 @@ pub fn apply_tool_calling_jail_internal(
        last_response_metadata: None,
        finished: false,
    };
+
    // Transform the stream using unfold to maintain state
    // Input: ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>
    // Returns None if the stream is finished
@@ -814,7 +815,9 @@ pub fn apply_tool_calling_jail_internal(
                        if let Ok((tool_calls, normal_text)) = try_tool_call_parse_aggregate(
                            accumulated_text,
                            state.tool_call_parser.as_deref(),
-                        ) {
+                        )
+                        .await
+                        {
                            // Found tool calls, create a final response with them
                            tracing::debug!(
                                "Parsed {} tool calls from accumulated content",
@@ -952,7 +955,7 @@ impl
        // transform the postprocessor stream
        let stream = Self::transform_postprocessor_stream(response_stream, response_generator);

-        let stream = self.apply_tool_calling_jail_with_parser(stream);
+        let stream = self.apply_tool_calling_jail_with_parser(stream).await;
        let context = stream.context();
        // prepend the annotations to the response stream
        let stream = annotations_stream.chain(stream);

--- a/lib/llm/tests/test_preprocessor.rs
+++ b/lib/llm/tests/test_preprocessor.rs
@@ -169,7 +169,7 @@ async fn test_apply_tool_calling_jail_internal_with_tool_call_detection() {

    // Apply the jail with nemotron_deci parser - should trigger jailing on first chunk
    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("nemotron_deci".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("nemotron_deci".to_string())).await;

    // Collect all results
    let results: Vec<_> = jailed_stream.collect().await;
@@ -225,7 +225,7 @@ async fn test_apply_tool_calling_jail_internal_no_tool_calls() {

    // Apply the jail with nemotron_deci parser - regular text should NOT be jailed
    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("nemotron_deci".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("nemotron_deci".to_string())).await;

    // Collect all results
    let results: Vec<_> = jailed_stream.collect().await;
@@ -276,7 +276,7 @@ async fn test_apply_tool_calling_jail_internal_with_empty_stream() {
    let input_stream = stream::iter(chunks);
    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());

-    let jailed_stream = apply_tool_calling_jail_internal(response_stream, None);
+    let jailed_stream = apply_tool_calling_jail_internal(response_stream, None).await;
    let results: Vec<_> = jailed_stream.collect().await;

    assert!(results.is_empty(), "Empty stream should produce no results");
@@ -300,7 +300,7 @@ async fn test_apply_tool_calling_jail_internal_with_different_parsers() {
    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());

    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("hermes".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("hermes".to_string())).await;
    let results: Vec<_> = jailed_stream.collect().await;

    assert!(!results.is_empty(), "Should have results for hermes parser");
@@ -360,7 +360,7 @@ async fn test_apply_tool_calling_jail_internal_hermes_parser() {
    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());

    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("hermes".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("hermes".to_string())).await;
    let results: Vec<_> = jailed_stream.collect().await;

    assert!(!results.is_empty(), "Should have results for hermes parser");
@@ -458,7 +458,7 @@ async fn test_apply_tool_calling_jail_internal_mistral_parser_with_no_tool_call_
    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());

    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("mistral".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("mistral".to_string())).await;

    let results: Vec<_> = jailed_stream.collect().await;

@@ -532,7 +532,7 @@ async fn test_apply_tool_calling_jail_internal_mistral_parser_with_false_positiv
    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());

    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("mistral".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("mistral".to_string())).await;
    let results: Vec<_> = jailed_stream.collect().await;

    assert!(
@@ -583,7 +583,7 @@ async fn test_apply_tool_calling_jail_internal_mistral_parser_with_false_positiv
    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());

    let jailed_stream =
-        apply_tool_calling_jail_internal(response_stream, Some("mistral".to_string()));
+        apply_tool_calling_jail_internal(response_stream, Some("mistral".to_string())).await;
    let results: Vec<_> = jailed_stream.collect().await;

    assert!(
@@ -635,3 +635,77 @@ async fn test_apply_tool_calling_jail_internal_mistral_parser_with_false_positiv
    assert_eq!(arguments["location"], "San Francisco");
    assert_eq!(arguments["unit"], "fahrenheit");
 }
+
+#[tokio::test]
+async fn test_tool_calling_jail_internal_with_harmony_parser() {
+    let mock_context = Arc::new(MockAsyncEngineContext::new(
+        "test-request-id-harmony".to_string(),
+    ));
+
+    // Harmony Format:
+    // <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
+    // <|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
+    // <|message|>{"location":"San Francisco"}<|call|>
+    let chunks = vec![
+        create_mock_response_chunk(
+            "<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>"
+                .to_string(),
+            0,
+        ),
+        create_mock_response_chunk("<|start|>".to_string(), 0),
+        create_mock_response_chunk("assistant".to_string(), 0),
+        create_mock_response_chunk("<|channel|>".to_string(), 0),
+        create_mock_response_chunk(
+            "commentary to=functions.get_current_weather <|constrain|>json".to_string(),
+            0,
+        ),
+        create_mock_response_chunk(
+            "<|message|>{\"location\":\"San Francisco\"}<|call|>".to_string(),
+            0,
+        ),
+        create_final_response_chunk(0),
+    ];
+
+    let input_stream = stream::iter(chunks);
+    let response_stream = ResponseStream::new(Box::pin(input_stream), mock_context.clone());
+
+    let jailed_stream =
+        apply_tool_calling_jail_internal(response_stream, Some("harmony".to_string())).await;
+    let results: Vec<_> = jailed_stream.collect().await;
+
+    assert!(
+        !results.is_empty(),
+        "Should have results for harmony parser"
+    );
+
+    assert_eq!(results.len(), 2);
+    assert_eq!(
+        results[1].data.as_ref().unwrap().choices[0].delta.content,
+        Some("Need to use function get_current_weather.".to_string())
+    );
+    assert!(
+        results[1].data.as_ref().unwrap().choices[0]
+            .delta
+            .tool_calls
+            .is_some()
+    );
+    let tools = results[1].data.as_ref().unwrap().choices[0]
+        .delta
+        .tool_calls
+        .as_ref()
+        .unwrap();
+    assert_eq!(tools.len(), 1);
+    let name = tools[0].function.as_ref().unwrap().name.as_ref().unwrap();
+    let arguments = serde_json::from_str::<serde_json::Value>(
+        tools[0]
+            .function
+            .as_ref()
+            .unwrap()
+            .arguments
+            .as_ref()
+            .unwrap(),
+    )
+    .unwrap();
+    assert_eq!(name, "get_current_weather");
+    assert_eq!(arguments["location"], "San Francisco");
+}
--- a/lib/parsers/Cargo.toml
+++ b/lib/parsers/Cargo.toml
@@ -29,6 +29,7 @@ anyhow = { workspace = true }
 dynamo-async-openai = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
+tokio = { workspace = true }
 tracing = { workspace = true }
 uuid = { workspace = true }

@@ -36,4 +37,4 @@ regex = "1"
 openai-harmony = "0.0.3"
 lazy_static = "1.5.0"
 rustpython-parser = "0.4.0"
-num-traits = "0.2"
\ No newline at end of file
+num-traits = "0.2"
--- a/lib/parsers/src/tool_calling/harmony/harmony_parser.rs
+++ b/lib/parsers/src/tool_calling/harmony/harmony_parser.rs
@@ -7,19 +7,27 @@ use openai_harmony::StreamableParser;
 use openai_harmony::chat::{Content::Text, Role};
 use openai_harmony::{HarmonyEncoding, HarmonyEncodingName, load_harmony_encoding};
 use serde_json::Value;
-use std::sync::OnceLock;

-static GLOBAL_HARMONY_GPTOSS_ENCODING: OnceLock<Result<HarmonyEncoding, anyhow::Error>> =
-    OnceLock::new();
+static GLOBAL_HARMONY_GPTOSS_ENCODING: tokio::sync::OnceCell<
+    Result<HarmonyEncoding, anyhow::Error>,
+> = tokio::sync::OnceCell::const_new();

-pub fn get_harmony_encoding() -> &'static Result<HarmonyEncoding, anyhow::Error> {
+pub async fn get_harmony_encoding() -> &'static Result<HarmonyEncoding, anyhow::Error> {
    GLOBAL_HARMONY_GPTOSS_ENCODING
-        .get_or_init(|| load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss))
+        .get_or_init(|| async {
+            tokio::task::spawn_blocking(|| {
+                load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss)
+            })
+            .await
+            .map_err(anyhow::Error::msg)
+            .flatten()
+        })
+        .await
 }

 /// Parse tool calls from Harmony Format text
 /// <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|><|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco"}<|call|>
-pub fn parse_tool_calls_harmony(
+pub async fn parse_tool_calls_harmony(
    text: &str,
    config: &JsonParserConfig,
 ) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
@@ -29,7 +37,7 @@ pub fn parse_tool_calls_harmony(
    // Check if tool call start tokens are present, if not return everything as normal text
    // Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present
    // End Token: "<|call|>"
-    if !detect_tool_call_start_harmony(text, config) {
+    if !detect_tool_call_start_harmony(text, config, true) {
        return Ok((vec![], Some(trimmed)));
    }

@@ -43,7 +51,7 @@ pub fn parse_tool_calls_harmony(
        trimmed.push_str(end_token);
    }

-    let enc = match get_harmony_encoding().as_ref() {
+    let enc = match get_harmony_encoding().await.as_ref() {
        Ok(e) => e,
        Err(e) => {
            tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
@@ -154,15 +162,28 @@ pub fn parse_tool_calls_harmony(
    Ok((res, Some(normal_text.to_string())))
 }

-pub fn detect_tool_call_start_harmony(chunk: &str, config: &JsonParserConfig) -> bool {
+pub fn detect_tool_call_start_harmony(
+    chunk: &str,
+    config: &JsonParserConfig,
+    strict: bool,
+) -> bool {
    let trimmed = chunk.trim();
    if trimmed.is_empty() {
        return false;
    }
-    config
-        .tool_call_start_tokens
-        .iter()
-        .any(|token| trimmed.contains(token))
+
+    if strict {
+        config
+            .tool_call_start_tokens
+            .iter()
+            .any(|token| trimmed.contains(token))
+    } else {
+        config
+            .tool_call_start_tokens
+            .iter()
+            .any(|token| trimmed.contains(token))
+            || trimmed.contains("<|channel|>")
+    }
 }

 #[cfg(test)]
@@ -174,8 +195,8 @@ mod tests {
        (call.function.name, args)
    }

-    #[test]
-    fn test_parse_tool_calls_harmony_basic() {
+    #[tokio::test]
+    async fn test_parse_tool_calls_harmony_basic() {
        let text = r#"
 <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
 <|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
@@ -186,7 +207,7 @@ mod tests {
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
+        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
        assert_eq!(
            normal_content,
            Some("Need to use function get_current_weather.".to_string())
@@ -197,8 +218,8 @@ mod tests {
        assert_eq!(args["location"], "San Francisco");
    }

-    #[test]
-    fn test_parse_tools_harmony_without_start_token() {
+    #[tokio::test]
+    async fn test_parse_tools_harmony_without_start_token() {
        let text = r#"
 <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
 <|message|>{"location":"San Francisco"}<|call|>
@@ -208,13 +229,13 @@ mod tests {
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
+        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
        assert_eq!(normal_content, Some(text.trim().to_string()));
        assert_eq!(tool_calls.len(), 0);
    }

-    #[test]
-    fn test_parse_tool_calls_harmony_with_multi_args() {
+    #[tokio::test]
+    async fn test_parse_tool_calls_harmony_with_multi_args() {
        let text = r#"
        <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
        <|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
@@ -225,7 +246,7 @@ mod tests {
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
+        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
        assert_eq!(
            normal_content,
            Some("Need to use function get_current_weather.".to_string())
@@ -237,8 +258,8 @@ mod tests {
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_parse_tool_calls_harmony_with_normal_text() {
+    #[tokio::test]
+    async fn test_parse_tool_calls_harmony_with_normal_text() {
        let text = r#"
        <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
        <|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
@@ -249,7 +270,7 @@ mod tests {
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
+        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
        assert_eq!(
            normal_content,
            Some("Need to use function get_current_weather.".to_string())
@@ -260,15 +281,15 @@ mod tests {
        assert_eq!(args["location"], "San Francisco");
    }

-    #[test]
-    fn test_parse_tool_calls_harmony_without_call_token() {
+    #[tokio::test]
+    async fn test_parse_tool_calls_harmony_without_call_token() {
        let text = r#"<|channel|>analysis<|message|>We need to call get_weather function. The user asks "What's the weather like in San Francisco in Celsius?" So location: "San Francisco, CA" unit: "celsius". Let's call function.<|end|><|start|>assistant<|channel|>commentary to=functions.get_weather <|constrain|>json<|message|>{"location":"San Francisco, CA","unit":"celsius"}"#;
        let config = JsonParserConfig {
            tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
+        let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
        assert_eq!(normal_content, Some("We need to call get_weather function. The user asks \"What's the weather like in San Francisco in Celsius?\" So location: \"San Francisco, CA\" unit: \"celsius\". Let's call function.".to_string()));
        assert_eq!(tool_calls.len(), 1);
        let (name, args) = extract_name_and_args(tool_calls[0].clone());
@@ -290,19 +311,21 @@ mod detect_parser_tests {
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let result = detect_tool_call_start_harmony(text, &config);
+        let result = detect_tool_call_start_harmony(text, &config, false);
        assert!(result);
    }

    #[test]
    fn test_detect_tool_call_start_harmony_chunk_without_tool_call_start_token() {
+        // This is a warkaround for now. Right now everything is treated as tool call start token.
+        // We need to improve this in the future.
        let text = r#"<|channel|>commentary to=functions.get_current_weather"#;
        let config = JsonParserConfig {
            tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
            tool_call_end_tokens: vec!["<|call|>".to_string()],
            ..Default::default()
        };
-        let result = detect_tool_call_start_harmony(text, &config);
-        assert!(!result);
+        let result = detect_tool_call_start_harmony(text, &config, false);
+        assert!(result);
    }
 }
--- a/lib/parsers/src/tool_calling/parsers.rs
+++ b/lib/parsers/src/tool_calling/parsers.rs
@@ -32,7 +32,7 @@ pub fn get_available_tool_parsers() -> Vec<&'static str> {
    get_tool_parser_map().keys().copied().collect()
 }

-pub fn try_tool_call_parse(
+pub async fn try_tool_call_parse(
    message: &str,
    config: &ToolCallConfig,
 ) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
@@ -43,7 +43,7 @@ pub fn try_tool_call_parse(
            Ok((results, normal_content))
        }
        ToolCallParserType::Harmony => {
-            let (results, normal_content) = parse_tool_calls_harmony(message, &config.json)?;
+            let (results, normal_content) = parse_tool_calls_harmony(message, &config.json).await?;
            Ok((results, normal_content))
        }
        ToolCallParserType::Pythonic => {
@@ -60,7 +60,7 @@ pub fn try_tool_call_parse(
 }

 // Base Detector to call for all tool parsing
-pub fn detect_and_parse_tool_call(
+pub async fn detect_and_parse_tool_call(
    message: &str,
    parser_str: Option<&str>,
 ) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
@@ -75,7 +75,7 @@ pub fn detect_and_parse_tool_call(

    match parser_map.get(parser_key) {
        Some(config) => {
-            let (results, normal_content) = try_tool_call_parse(message, config)?;
+            let (results, normal_content) = try_tool_call_parse(message, config).await?;
            Ok((results, normal_content))
        }
        None => anyhow::bail!(
@@ -96,7 +96,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow::
    match parser_map.get(parser_key) {
        Some(config) => match config.format {
            ToolCallParserType::Json => Ok(detect_tool_call_start_json(chunk, &config.json)),
-            ToolCallParserType::Harmony => Ok(detect_tool_call_start_harmony(chunk, &config.json)),
+            ToolCallParserType::Harmony => {
+                Ok(detect_tool_call_start_harmony(chunk, &config.json, false))
+            }
            ToolCallParserType::Pythonic => Ok(detect_tool_call_start_pythonic(chunk)),
            ToolCallParserType::Typescript => {
                anyhow::bail!("Typescript parser not implemented");
@@ -146,10 +148,12 @@ mod tests {
        }
    }

-    #[test]
-    fn parses_single_parameters_object() {
+    #[tokio::test]
+    async fn parses_single_parameters_object() {
        let input = r#"{ "name": "hello", "parameters": { "x": 1, "y": 2 } }"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -159,10 +163,12 @@ mod tests {
        assert_eq!(args["y"], 2);
    }

-    #[test]
-    fn parses_single_arguments_object() {
+    #[tokio::test]
+    async fn parses_single_arguments_object() {
        let input = r#"{ "name": "world", "arguments": { "a": "abc", "b": 42 } }"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -172,10 +178,12 @@ mod tests {
        assert_eq!(args["b"], 42);
    }

-    #[test]
-    fn parses_vec_of_parameters() {
+    #[tokio::test]
+    async fn parses_vec_of_parameters() {
        let input = r#"[{ "name": "first", "parameters": { "a": 1 } }, { "name": "second", "parameters": { "b": 2 } }]"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -187,10 +195,12 @@ mod tests {
        assert_eq!(args["b"], 2);
    }

-    #[test]
-    fn parses_vec_of_arguments() {
+    #[tokio::test]
+    async fn parses_vec_of_arguments() {
        let input = r#"[{ "name": "alpha", "arguments": { "a": "x" } }, { "name": "omega", "arguments": { "z": "y" } }]"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -202,11 +212,13 @@ mod tests {
        assert_eq!(args["z"], "y");
    }

-    #[test]
-    fn parses_toolcall_wrapped_payload() {
+    #[tokio::test]
+    async fn parses_toolcall_wrapped_payload() {
        let input =
            r#"<TOOLCALL>[{ "name": "wrapped", "parameters": { "foo": "bar" } }]</TOOLCALL>"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -215,8 +227,8 @@ mod tests {
        assert_eq!(args["foo"], "bar");
    }

-    #[test]
-    fn parses_python_tag_prefixed_payload() {
+    #[tokio::test]
+    async fn parses_python_tag_prefixed_payload() {
        let input = r#"<|python_tag|>{ "name": "pyfunc", "arguments": { "k": "v" } }"#;
        let (result, content) = try_tool_call_parse(
            input,
@@ -229,6 +241,7 @@ mod tests {
                },
            },
        )
+        .await
        .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
@@ -238,31 +251,37 @@ mod tests {
        assert_eq!(args["k"], "v");
    }

-    #[test]
-    fn returns_none_on_invalid_input() {
+    #[tokio::test]
+    async fn returns_none_on_invalid_input() {
        let input = r#"not even json"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("not even json".to_string()));
        assert!(result.is_empty());
    }

-    #[test]
-    fn returns_none_on_valid_json_wrong_shape() {
+    #[tokio::test]
+    async fn returns_none_on_valid_json_wrong_shape() {
        let input = r#"{ "foo": "bar" }"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some("{ \"foo\": \"bar\" }".to_string()));
        assert!(result.is_empty());
    }

    // Tests for real model outputs - disabled by default
-    #[test]
-    fn test_nvidia_llama3_nemotron_super_49b_simple() {
+    #[tokio::test]
+    async fn test_nvidia_llama3_nemotron_super_49b_simple() {
        let input = r#"<think>
 Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me check the tools available.
 </think>

 <TOOLCALL>[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]</TOOLCALL>"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("nemotron_deci")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("nemotron_deci"))
+            .await
+            .unwrap();
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
        assert_eq!(content, Some("<think>\nOkay, the user is asking for the weather in San Francisco in Fahrenheit. Let me check the tools available.\n</think>".to_string()));
@@ -272,10 +291,12 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_nvidia_llama3_nemotron_super_49b_simple_with_no_think() {
+    #[tokio::test]
+    async fn test_nvidia_llama3_nemotron_super_49b_simple_with_no_think() {
        let input = r#"<TOOLCALL>[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]</TOOLCALL>"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("nemotron_deci")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("nemotron_deci"))
+            .await
+            .unwrap();
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
        assert_eq!(content, Some("".to_string()));
@@ -285,15 +306,15 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_nvidia_llama3_nemotron_super_49b_with_function_array() {
+    #[tokio::test]
+    async fn test_nvidia_llama3_nemotron_super_49b_with_function_array() {
        let input = r#"<think>
 Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me check the tools available.
 </think>

 <TOOLCALL>[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]</TOOLCALL>"#;
        let config = ToolCallConfig::nemotron_deci();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("<think>\nOkay, the user is asking for the weather in San Francisco in Fahrenheit. Let me check the tools available.\n</think>".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -307,8 +328,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_nvidia_llama3_nemotron_super_49b_with_function_array_with_new_lines() {
+    #[tokio::test]
+    async fn test_nvidia_llama3_nemotron_super_49b_with_function_array_with_new_lines() {
        let input = r#"<think>
 Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me check the tools available.
 </think>
@@ -324,7 +345,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
  </TOOLCALL>
  "#;
        let config = ToolCallConfig::nemotron_deci();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("<think>\nOkay, the user is asking for the weather in San Francisco in Fahrenheit. Let me check the tools available.\n</think>".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -338,12 +359,14 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_qwen_qwq_32b_simple() {
+    #[tokio::test]
+    async fn test_qwen_qwq_32b_simple() {
        let input = r#"<tool_call>
 {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
 </tool_call>"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("hermes")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("hermes"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -353,23 +376,27 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_qwen_qwq_32b_simple_with_normal_text() {
+    #[tokio::test]
+    async fn test_qwen_qwq_32b_simple_with_normal_text() {
        let input = r#"Hey How are you? <tool_call>
 {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
 </tool_call>"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("hermes")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("hermes"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
    }

-    #[test]
-    fn test_nousresearch_hermes3_llama31_8b_simple() {
+    #[tokio::test]
+    async fn test_nousresearch_hermes3_llama31_8b_simple() {
        let input = r#"<tool_call>
 {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
 </tool_call>"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("hermes")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("hermes"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -379,8 +406,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_qwen_qwq_32b_multiple_tool_calls() {
+    #[tokio::test]
+    async fn test_qwen_qwq_32b_multiple_tool_calls() {
        let input = r#"<tool_call>
 {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
 </tool_call>
@@ -389,7 +416,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
 </tool_call>
 "#;
        let config = ToolCallConfig::hermes();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -403,8 +430,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_qwen_qwq_32b_multiple_tool_calls_with_normal_text() {
+    #[tokio::test]
+    async fn test_qwen_qwq_32b_multiple_tool_calls_with_normal_text() {
        let input = r#"Hey How are you? <tool_call>
 {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
 </tool_call>
@@ -413,7 +440,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
 </tool_call>
 "#;
        let config = ToolCallConfig::hermes();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -427,8 +454,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_qwen_qwq_32b_multiple_tool_calls_with_new_lines() {
+    #[tokio::test]
+    async fn test_qwen_qwq_32b_multiple_tool_calls_with_new_lines() {
        let input = r#"<tool_call>
 {"name": "get_weather",
 "arguments": {"location": "San Francisco, CA",
@@ -441,7 +468,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
 </tool_call>
 "#;
        let config = ToolCallConfig::hermes();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -455,9 +482,9 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
+    #[tokio::test]
    #[ignore]
-    fn test_ibm_granite_40_tiny_preview_simple() {
+    async fn test_ibm_granite_40_tiny_preview_simple() {
        let input = r#"[{"arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}, "name": "get_weather"}]"#;
        let config = ToolCallConfig {
            format: ToolCallParserType::Json,
@@ -468,7 +495,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
                ..Default::default()
            },
        };
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -478,11 +505,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_simple() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_simple() {
        let input = r#" [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -492,11 +519,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_simple_with_normal_text() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_simple_with_normal_text() {
        let input = r#"Hey How are you? [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -506,8 +533,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_simple_with_new_lines() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_simple_with_new_lines() {
        let input = r#"
        [{"name": "get_weather",
        "arguments": {"location":
@@ -515,7 +542,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        "unit": "fahrenheit"}}]
        "#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -525,11 +552,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_multiple() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_multiple() {
        let input = r#" [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -543,11 +570,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_multiple_with_normal_text() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_multiple_with_normal_text() {
        let input = r#"Hey How are you? [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -561,8 +588,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_multiple_with_new_lines() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_multiple_with_new_lines() {
        let input = r#"
        [{"name": "get_weather",
        "arguments": {"location":
@@ -573,7 +600,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        "fahrenheit"}}]
        "#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -587,11 +614,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token() {
        let input = r#"[TOOL_CALLS] [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -601,11 +628,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_with_normal_text() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_with_normal_text() {
        let input = r#"Hey How are you? [TOOL_CALLS] [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -615,8 +642,8 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_single_with_start_tokenwith_new_lines() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_single_with_start_tokenwith_new_lines() {
        let input = r#"
        [TOOL_CALLS]
        [{"name": "get_weather",
@@ -625,7 +652,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        "unit": "fahrenheit"}}]
        "#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -635,11 +662,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_multiple() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_multiple() {
        let input = r#"[TOOL_CALLS] [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -653,11 +680,12 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_multiple_with_normal_text() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_multiple_with_normal_text()
+     {
        let input = r#"Hey How are you? [TOOL_CALLS] [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -671,8 +699,9 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_multiple_with_new_lines() {
+    #[tokio::test]
+    async fn test_mistralai_mistral_7b_instruct_v03_single_with_start_token_multiple_with_new_lines()
+     {
        let input = r#"
        [TOOL_CALLS]
        [{"name": "get_weather",
@@ -684,7 +713,7 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        "fahrenheit"}}]
        "#;
        let config = ToolCallConfig::mistral();
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -698,10 +727,12 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_simple() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_simple() {
        let input = r#"{"name": "get_weather", "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit"}}"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -711,10 +742,12 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_simple_with_normal_text() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_simple_with_normal_text() {
        let input = r#"Hey How are you? {"name": "get_weather", "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit"}}"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral())
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -724,13 +757,15 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_with_new_lines() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_with_new_lines() {
        let input = r#"
        {"name": "get_weather",
        "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -740,10 +775,12 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_with_python_tag() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_with_python_tag() {
        let input = r#"<|python_tag|>{ "name": "get_weather", "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit" } }"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -753,10 +790,12 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_with_python_tag_with_normal_text() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_with_python_tag_with_normal_text() {
        let input = r#"Hey How are you? <|python_tag|>{ "name": "get_weather", "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit" } }"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -766,13 +805,15 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_with_python_tag_with_new_lines() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_with_python_tag_with_new_lines() {
        let input = r#"
        <|python_tag|>
        {"name": "get_weather", "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -782,15 +823,17 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_meta_llama_llama31_8b_instruct_with_python_tag_multiple_with_new_lines() {
+    #[tokio::test]
+    async fn test_meta_llama_llama31_8b_instruct_with_python_tag_multiple_with_new_lines() {
        let input = r#"
        <|python_tag|>
        {"name": "get_weather", "parameters": {"location": "San Francisco, CA", "unit": "fahrenheit" }}
        <|python_tag|>
        {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "fahrenheit" }}
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("llama3_json"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -804,11 +847,11 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_error_handling() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_error_handling() {
        // Unknown parser string should return an error
        let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco, CA"}}"#;
-        let result = detect_and_parse_tool_call(input, Some("unknown_parser"));
+        let result = detect_and_parse_tool_call(input, Some("unknown_parser")).await;
        assert!(result.is_err());
        let err = result.unwrap_err().to_string();
        assert!(
@@ -819,33 +862,39 @@ Okay, the user is asking for the weather in San Francisco in Fahrenheit. Let me

        // Known parser, but invalid input (not JSON) should return Ok(None)
        let input = "not a json";
-        let (result, content) = detect_and_parse_tool_call(input, Some("hermes")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("hermes"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("not a json".to_string()));
        assert!(result.is_empty());

        // Known parser, but valid JSON with wrong shape should return Ok(None)
        let input = r#"{"foo": "bar"}"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("hermes")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("hermes"))
+            .await
+            .unwrap();
        assert_eq!(content, Some(r#"{"foo": "bar"}"#.to_string()));
        assert!(result.is_empty());
    }

-    #[test]
+    #[tokio::test]
    #[ignore]
-    fn test_internlm_internlm2_5_7b_chat_simple() {
+    async fn test_internlm_internlm2_5_7b_chat_simple() {
        let input = r#"San Francisco's weather is known for its mild climate with plenty of fog, especially along the coast. Here's an overview of the weather in Fahrenheit:

 - **Summer (June to August)**: Average highs range from the mid-60s to low 70s Fahrenheit, with cooler mornings and evenings. Coastal areas may be cooler than inland spots.

 Remember, San Francisco weather can be quite unpredictable, particularly with its famous fog, which can significantly lower temperatures. Always check a local weather forecast for the most accurate and up-to-date information."#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::default())
+            .await
+            .unwrap();
        assert_eq!(content, Some(input.to_string()));
        assert!(result.is_empty()); // This model doesn't produce tool calls
    }

-    #[test]
+    #[tokio::test]
    #[ignore]
-    fn test_ai21labs_ai21_jamba_15_mini_simple() {
+    async fn test_ai21labs_ai21_jamba_15_mini_simple() {
        let input = r#" [
    {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}
 ]"#;
@@ -858,7 +907,7 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
                ..Default::default()
            },
        };
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -868,9 +917,9 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
+    #[tokio::test]
    #[ignore]
-    fn test_salesforce_llama_xlam_2_8b_fc_r_simple() {
+    async fn test_salesforce_llama_xlam_2_8b_fc_r_simple() {
        let input = r#"[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]"#;
        let config = ToolCallConfig {
            format: ToolCallParserType::Json,
@@ -881,7 +930,7 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
                ..Default::default()
            },
        };
-        let (result, content) = try_tool_call_parse(input, &config).unwrap();
+        let (result, content) = try_tool_call_parse(input, &config).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -891,10 +940,10 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_nemotron_deci() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_nemotron_deci() {
        let input = r#"<TOOLCALL>[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}]</TOOLCALL>"#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -904,10 +953,10 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_nemotron_deci_multiple() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_nemotron_deci_multiple() {
        let input = r#"<TOOLCALL>[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]</TOOLCALL>"#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -921,10 +970,11 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_nemotron_deci_multiple_with_normal_text() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_nemotron_deci_multiple_with_normal_text()
+     {
        let input = r#"Hey How are you? <TOOLCALL>[{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}, {"name": "get_weather", "arguments": {"location": "New York, NY", "unit": "fahrenheit"}}]</TOOLCALL>"#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 2);
@@ -938,10 +988,10 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_llama3_json_with_python_tag() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_llama3_json_with_python_tag() {
        let input = r#"<|python_tag|>{ "name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit" } }"#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -951,11 +1001,11 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_llama3_json_with_python_tag_with_normal_text()
-    {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_llama3_json_with_python_tag_with_normal_text()
+     {
        let input = r#"Hey How are you? <|python_tag|>{ "name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit" } }"#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -965,8 +1015,9 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_llama3_json_with_python_tag_with_new_lines() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_llama3_json_with_python_tag_with_new_lines()
+     {
        let input = r#"
        <|python_tag|>
        {"name":
@@ -975,7 +1026,7 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
          {"location": "San Francisco, CA",
          "unit": "fahrenheit" }}
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -985,15 +1036,15 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_llama3_json_without_python_tag_multiple_with_new_lines()
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_llama3_json_without_python_tag_multiple_with_new_lines()
     {
        let input = r#"
        {"name": "get_weather", "arguments":
         {"location": "San Francisco, CA",
          "unit": "fahrenheit" }}
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, None).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, None).await.unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -1003,10 +1054,12 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_llama3_json_without_python_tag() {
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_llama3_json_without_python_tag() {
        let input = r#"{ "name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit" } }"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral())
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -1016,11 +1069,13 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_detect_and_parse_tool_call_default_parser_llama3_json_without_python_tag_with_normal_text()
+    #[tokio::test]
+    async fn test_detect_and_parse_tool_call_default_parser_llama3_json_without_python_tag_with_normal_text()
     {
        let input = r#"Hey How are you? { "name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit" } }"#;
-        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral()).unwrap();
+        let (result, content) = try_tool_call_parse(input, &ToolCallConfig::mistral())
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert!(!result.is_empty());
        assert_eq!(result.len(), 1);
@@ -1030,11 +1085,13 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_phi4_single_function_call() {
+    #[tokio::test]
+    async fn test_phi4_single_function_call() {
        let input =
            r#"functools[{"name": "get_country_capital", "arguments": {"country": "Poland"}}]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1042,10 +1099,12 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["country"], "Poland");
    }

-    #[test]
-    fn test_phi4_single_function_call_with_normal_text() {
+    #[tokio::test]
+    async fn test_phi4_single_function_call_with_normal_text() {
        let input = r#"Hey How are you? functools[{"name": "get_country_capital", "arguments": {"country": "Poland"}}]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1053,13 +1112,15 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["country"], "Poland");
    }

-    #[test]
-    fn test_phi4_multiple_function_calls_simple_arguments() {
+    #[tokio::test]
+    async fn test_phi4_multiple_function_calls_simple_arguments() {
        let input = r#"functools[
  {"name": "get_country_capital", "arguments": {"country": "Poland"}},
  {"name": "get_population", "arguments": {"city": "Warsaw"}}
 ]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 2);

@@ -1072,13 +1133,15 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args2["city"], "Warsaw");
    }

-    #[test]
-    fn test_phi4_multiple_function_calls_simple_arguments_with_normal_text() {
+    #[tokio::test]
+    async fn test_phi4_multiple_function_calls_simple_arguments_with_normal_text() {
        let input = r#"Hey How are you? functools[
  {"name": "get_country_capital", "arguments": {"country": "Poland"}},
  {"name": "get_population", "arguments": {"city": "Warsaw"}}
 ]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert_eq!(result.len(), 2);

@@ -1091,12 +1154,14 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args2["city"], "Warsaw");
    }

-    #[test]
-    fn test_phi4_single_function_call_nested_json_arguments() {
+    #[tokio::test]
+    async fn test_phi4_single_function_call_nested_json_arguments() {
        let input = r#"functools[{"name": "get_weather_forecast", "arguments":
        {"location": {"city": "San Francisco",
        "state": "CA"}, "date": "2023-10-05"}}]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1106,12 +1171,14 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["location"]["state"], "CA");
    }

-    #[test]
-    fn test_phi4_single_function_call_nested_json_arguments_with_normal_text() {
+    #[tokio::test]
+    async fn test_phi4_single_function_call_nested_json_arguments_with_normal_text() {
        let input = r#"Hey How are you? functools[{"name": "get_weather_forecast", "arguments":
        {"location": {"city": "San Francisco",
        "state": "CA"}, "date": "2023-10-05"}}]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1121,11 +1188,13 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["location"]["state"], "CA");
    }

-    #[test]
-    fn test_phi4_function_call_with_parameters_instead_of_arguments() {
+    #[tokio::test]
+    async fn test_phi4_function_call_with_parameters_instead_of_arguments() {
        let input = r#"functools[{"name": "calculate_distance",
         "parameters": {"from": "New York", "to": "Los Angeles"}}]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1134,11 +1203,13 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["to"], "Los Angeles");
    }

-    #[test]
-    fn test_phi4_function_call_with_parameters_instead_of_arguments_with_normal_text() {
+    #[tokio::test]
+    async fn test_phi4_function_call_with_parameters_instead_of_arguments_with_normal_text() {
        let input = r#"Hey How are you? functools[{"name": "calculate_distance",
         "parameters": {"from": "New York", "to": "Los Angeles"}}]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("phi4")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("phi4"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1147,10 +1218,12 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["to"], "Los Angeles");
    }

-    #[test]
-    fn test_pythonic_parser_basic_with_constants() {
+    #[tokio::test]
+    async fn test_pythonic_parser_basic_with_constants() {
        let input = r#"[get_weather(location="San Francisco", unit="fahrenheit"), get_weather(location="New York", unit="fahrenheit")]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("pythonic")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("pythonic"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 2);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1163,11 +1236,13 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
+    #[tokio::test]
    #[ignore]
-    fn test_pythonic_parser_with_constants_and_normal_text() {
+    async fn test_pythonic_parser_with_constants_and_normal_text() {
        let input = r#"Hey How are you? [get_weather(location="San Francisco", unit="fahrenheit"), get_weather(location="New York", unit="fahrenheit")]"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("pythonic")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("pythonic"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("Hey How are you?".to_string()));
        assert_eq!(result.len(), 2);

@@ -1181,14 +1256,16 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_harmony_parser_basic() {
+    #[tokio::test]
+    async fn test_harmony_parser_basic() {
        let input = r#"
        <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
        <|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
        <|message|>{"location":"San Francisco", "unit":"fahrenheit"}<|call|>
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("harmony")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("harmony"))
+            .await
+            .unwrap();
        assert_eq!(
            content,
            Some("Need to use function get_current_weather.".to_string())
@@ -1200,10 +1277,12 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["unit"], "fahrenheit");
    }

-    #[test]
-    fn test_deepseek_v3_1_parser_basic() {
+    #[tokio::test]
+    async fn test_deepseek_v3_1_parser_basic() {
        let input = r#"<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_current_weather<｜tool▁sep｜>{"location": "Tokyo"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_current_weather<｜tool▁sep｜>{"location": "Paris"}<｜tool▁call▁end｜><｜tool▁calls▁end｜><｜end▁of▁sentence｜>"#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("deepseek_v3_1")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("deepseek_v3_1"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 2);
        let (name, args) = extract_name_and_args(result[0].clone());
@@ -1214,11 +1293,13 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
        assert_eq!(args["location"], "Paris");
    }

-    #[test]
-    fn test_hermes_parser_without_new_line() {
+    #[tokio::test]
+    async fn test_hermes_parser_without_new_line() {
        let input = r#"<tool_call>{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "celsius"}}</tool_call>"
        "#;
-        let (result, content) = detect_and_parse_tool_call(input, Some("hermes")).unwrap();
+        let (result, content) = detect_and_parse_tool_call(input, Some("hermes"))
+            .await
+            .unwrap();
        assert_eq!(content, Some("".to_string()));
        assert_eq!(result.len(), 1);
        let (name, args) = extract_name_and_args(result[0].clone());

--- a/lib/parsers/src/tool_calling/tools.rs
+++ b/lib/parsers/src/tool_calling/tools.rs
@@ -7,7 +7,7 @@ pub use super::parsers::detect_and_parse_tool_call;
 /// Try parsing a string as a structured tool call, for aggregation usage.
 ///
 /// If successful, returns a `ChatCompletionMessageToolCall`.
-pub fn try_tool_call_parse_aggregate(
+pub async fn try_tool_call_parse_aggregate(
    message: &str,
    parser_str: Option<&str>,
 ) -> anyhow::Result<(
@@ -19,7 +19,7 @@ pub fn try_tool_call_parse_aggregate(
    } else {
        tracing::info!("Using tool parser: {:?}", parser_str);
    }
-    let (parsed, content) = detect_and_parse_tool_call(message, parser_str)?;
+    let (parsed, content) = detect_and_parse_tool_call(message, parser_str).await?;
    if parsed.is_empty() {
        return Ok((vec![], content));
    }
@@ -44,14 +44,14 @@ pub fn try_tool_call_parse_aggregate(
 /// Try parsing a string as a structured tool call, for streaming (delta) usage.
 ///
 /// If successful, returns a `ChatCompletionMessageToolCallChunk`.
-pub fn try_tool_call_parse_stream(
+pub async fn try_tool_call_parse_stream(
    message: &str,
    parser_str: Option<&str>,
 ) -> anyhow::Result<(
    Vec<dynamo_async_openai::types::ChatCompletionMessageToolCallChunk>,
    Option<String>,
 )> {
-    let (parsed, content) = detect_and_parse_tool_call(message, parser_str)?;
+    let (parsed, content) = detect_and_parse_tool_call(message, parser_str).await?;
    if parsed.is_empty() {
        return Ok((vec![], content));
    }