chore: Send llama.cpp logs to tracing crate (#1292)

Unify them with all our other logs, so we can filter with DYN_LOG, they will eventually go to the log aggregation, etc.

chore: Send llama.cpp logs to tracing crate (#1292)
Unify them with all our other logs, so we can filter with DYN_LOG, they will eventually go to the log aggregation, etc.
7bb21ee7 · Graham King · GitHub · 6ea08301 · 7bb21ee7
Unverified Commit 7bb21ee7 authored May 30, 2025 by Graham King Committed by GitHub May 30, 2025
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 1 deletion

lib/engines/llamacpp/src/lib.rs lib/engines/llamacpp/src/lib.rs +7 -1

No files found.
--- a/lib/engines/llamacpp/src/lib.rs
+++ b/lib/engines/llamacpp/src/lib.rs
@@ -4,7 +4,7 @@
 use std::{
    num::NonZeroU32,
    path::Path,
-    sync::{Arc, Mutex, OnceLock},
+    sync::{Arc, Mutex, Once, OnceLock},
 };

 use async_stream::stream;
@@ -20,6 +20,7 @@ use llama_cpp_2::{
    model::{params::LlamaModelParams, LlamaModel},
    sampling::LlamaSampler,
    token::LlamaToken,
+    LogOptions,
 };

 use dynamo_llm::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
@@ -36,6 +37,8 @@ const NUM_CONTEXTS: usize = 3;
 static LLAMA_CONTEXTS: [OnceLock<Mutex<ContextWrapper>>; NUM_CONTEXTS] =
    [OnceLock::new(), OnceLock::new(), OnceLock::new()];

+static LLAMA_CPP_LOG_REDIRECT: Once = Once::new();
+
 // Newtype to simplify LlamaContext lifetime
 #[derive(Debug)]
 struct ContextWrapper(LlamaContext<'static>);
@@ -66,6 +69,9 @@ impl LlamacppEngine {
        cancel_token: CancellationToken,
        model_config: &LocalModel,
    ) -> pipeline_error::Result<Self> {
+        LLAMA_CPP_LOG_REDIRECT.call_once(|| {
+            llama_cpp_2::send_logs_to_tracing(LogOptions::default().with_logs_enabled(true));
+        });
        let backend = LlamaBackend::init()?;
        let model = load_model(&backend, model_config.path())?;
        LLAMA_MODEL.set(model)?;