Skip to main content

cadmus_core/
logging.rs

1//! Structured logging infrastructure with JSON output and OpenTelemetry integration.
2//!
3//! This module provides logging functionality for Cadmus, including:
4//! - JSON-structured logs written to rotating files
5//! - Configurable log levels and filtering
6//! - Automatic log file cleanup based on retention policies
7//! - Optional OpenTelemetry export (when `otel` feature is enabled)
8//! - Unique run ID for correlating logs across a session
9//!
10//! # Architecture
11//!
12//! The logging system is built on the `tracing` crate ecosystem:
13//! - `tracing_subscriber` for composable logging layers
14//! - `tracing_appender` for non-blocking file I/O
15//! - JSON formatting for structured, machine-readable logs
16//! - `EnvFilter` for flexible log level control
17//!
18//! Each application run generates a unique Run ID (UUID v7) that appears in:
19//! - The log filename: `cadmus-<run_id>.json`
20//! - OpenTelemetry resource attributes
21//! - All log entries for correlation
22//!
23//! # Log File Management
24//!
25//! Log files are automatically managed:
26//! - Files are named with the run ID: `cadmus-<run_id>.json`
27//! - Older files are deleted when `max_files` limit is exceeded
28//! - Cleanup happens at initialization, keeping only the most recent files
29//!
30//! # Configuration
31//!
32//! Logging is configured via `LoggingSettings`:
33//!
34//! ```toml
35//! [logging]
36//! enabled = true
37//! level = "info"
38//! max-files = 3
39//! directory = "logs"
40//! otlp-endpoint = "http://localhost:4318"  # Optional
41//! ```
42//!
43//! The log level can be overridden with the `RUST_LOG` environment variable:
44//!
45//! ```bash
46//! RUST_LOG=debug ./cadmus
47//! RUST_LOG=cadmus::view=trace,info ./cadmus
48//! ```
49//!
50//! # Example Usage
51//!
52//! ```no_run
53//! use cadmus_core::device::CURRENT_DEVICE;
54//! use cadmus_core::settings::LoggingSettings;
55//! use cadmus_core::logging::{init_logging, shutdown_logging, get_run_id};
56//!
57//! let settings = LoggingSettings {
58//!     enabled: true,
59//!     level: "info".to_string(),
60//!     max_files: 3,
61//!     directory: "logs".into(),
62//!     otlp_endpoint: None,
63//!     pyroscope_endpoint: None,
64//!     enable_kern_log: false,
65//!     enable_dbus_log: false,
66//! };
67//!
68//! // Initialize at application startup
69//! let log_dir = CURRENT_DEVICE.data_path(&settings.directory);
70//! init_logging(&settings, log_dir)?;
71//! eprintln!("Started with run ID: {}", get_run_id());
72//!
73//! // Use tracing macros throughout the application
74//! tracing::info!("Application started");
75//!
76//! // Shutdown at application exit (flushes buffers)
77//! shutdown_logging();
78//! # Ok::<(), anyhow::Error>(())
79//! ```
80
81use crate::settings::LoggingSettings;
82#[cfg(feature = "tracing")]
83use crate::telemetry;
84use crate::version::get_current_version;
85use anyhow::{Context, Error, ensure};
86use arc_swap::ArcSwap;
87use std::fs;
88use std::fs::DirEntry;
89use std::path::Path;
90use std::sync::mpsc;
91use std::sync::{Arc, Mutex, OnceLock};
92use std::thread;
93use std::time::Duration;
94use tracing_appender::non_blocking::{NonBlocking, WorkerGuard};
95use tracing_subscriber::EnvFilter;
96use tracing_subscriber::prelude::*;
97use uuid::Uuid;
98
99mod kern;
100
101const LOG_FILE_PREFIX: &str = "cadmus-";
102const LOG_FILE_SUFFIX: &str = "json";
103
104static LOG_GUARD: OnceLock<Mutex<Option<WorkerGuard>>> = OnceLock::new();
105static RUN_ID: OnceLock<String> = OnceLock::new();
106static WRITER_INNER: OnceLock<ArcSwap<NonBlocking>> = OnceLock::new();
107
108struct SwappableWriter {
109    inner: &'static ArcSwap<NonBlocking>,
110}
111
112impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for SwappableWriter {
113    type Writer = NonBlocking;
114
115    /// Performs a lock-free atomic load of the current writer.
116    fn make_writer(&'a self) -> Self::Writer {
117        self.inner.load().as_ref().clone()
118    }
119}
120
121/// Returns the unique run ID for this application session.
122///
123/// The run ID is a UUID v7 generated at first access and remains constant
124/// for the lifetime of the process. It is used to:
125/// - Name the log file: `cadmus-<run_id>.json`
126/// - Tag OpenTelemetry telemetry exports
127/// - Correlate all operations within a single run
128///
129/// # Returns
130///
131/// A string slice containing the run ID, valid for the program's lifetime.
132///
133/// # Example
134///
135/// ```
136/// use cadmus_core::logging::get_run_id;
137///
138/// let run_id = get_run_id();
139/// eprintln!("Application run ID: {}", run_id);
140/// assert_eq!(get_run_id(), run_id); // Consistent across calls
141/// ```
142pub fn get_run_id() -> &'static str {
143    RUN_ID.get_or_init(|| Uuid::now_v7().to_string()).as_str()
144}
145
146/// Removes old log files to maintain the configured retention limit.
147///
148/// This function scans the log directory for files matching the pattern
149/// `cadmus-*.json` and deletes the oldest files if the count exceeds `max_files`.
150///
151/// Note: this relies on the run ID being a UUID v7 (time-ordered). Filenames are
152/// `cadmus-<run_id>.json` where `<run_id>` is generated with `Uuid::now_v7()`,
153/// so lexicographic sorting of the filenames corresponds to chronological order.
154/// Sorting by file name therefore yields oldest-first ordering for removal.
155///
156/// # Arguments
157///
158/// * `log_dir` - Path to the directory containing log files
159/// * `max_files` - Maximum number of log files to retain (0 = keep all)
160///
161/// # Returns
162///
163/// Returns `Ok(())` on success.
164///
165/// # Errors
166///
167/// Returns an error if:
168/// - The log directory cannot be read
169/// - Individual directory entries cannot be read
170/// - Old log files cannot be deleted
171fn cleanup_run_logs(log_dir: &std::path::Path, max_files: usize) -> Result<(), Error> {
172    if max_files == 0 {
173        return Ok(());
174    }
175
176    let mut entries = collect_run_log_entries(log_dir)?;
177    if entries.len() <= max_files {
178        return Ok(());
179    }
180
181    entries.sort_by_key(|entry| entry.file_name());
182    let remove_count = entries.len().saturating_sub(max_files);
183    for entry in entries.into_iter().take(remove_count) {
184        fs::remove_file(entry.path())
185            .with_context(|| format!("can't remove old log file {}", entry.path().display()))?;
186    }
187
188    Ok(())
189}
190
191/// Collects all Cadmus log file entries from the specified directory.
192///
193/// Only files matching the pattern `cadmus-*.json` are collected.
194///
195/// # Arguments
196///
197/// * `log_dir` - Path to the directory to scan
198///
199/// # Returns
200///
201/// Returns a vector of directory entries representing log files.
202///
203/// # Errors
204///
205/// Returns an error if the directory cannot be read or entries are inaccessible.
206fn collect_run_log_entries(log_dir: &std::path::Path) -> Result<Vec<DirEntry>, Error> {
207    let mut entries = Vec::new();
208    for entry in fs::read_dir(log_dir)
209        .with_context(|| format!("can't read log directory {}", log_dir.display()))?
210    {
211        let entry = entry.context("can't read log directory entry")?;
212        if is_run_log_entry(&entry) {
213            entries.push(entry);
214        }
215    }
216
217    Ok(entries)
218}
219
220/// Determines whether a directory entry is a Cadmus log file.
221///
222/// Returns `true` if the filename starts with `cadmus-` and ends with `.json`.
223///
224/// # Arguments
225///
226/// * `entry` - Directory entry to check
227///
228/// # Returns
229///
230/// `true` if the entry is a log file, `false` otherwise.
231fn is_run_log_entry(entry: &DirEntry) -> bool {
232    let file_name = entry.file_name();
233    let file_name = file_name.to_string_lossy();
234    if !file_name.starts_with(LOG_FILE_PREFIX) {
235        return false;
236    }
237
238    file_name.ends_with(LOG_FILE_SUFFIX)
239}
240
241/// Initializes the logging system with JSON output and optional OpenTelemetry export.
242///
243/// This function sets up the complete logging infrastructure:
244/// - Creates the log directory if it doesn't exist
245/// - Cleans up old log files based on retention policy
246/// - Configures a rolling file appender with non-blocking I/O
247/// - Applies log level filtering from settings or environment
248/// - Sets up JSON formatting for structured logs
249/// - Initializes tracing export if the `tracing` feature is enabled
250/// - Bridges `log::` records (e.g. from pyroscope-rs) into the tracing pipeline
251///   so they appear in Loki alongside tracing events via the tracing-log
252///   layer automatically enabled by tracing-subscriber.
253///
254/// The function should only be called once at application startup.
255/// The logging system remains active until `shutdown_logging()` is called.
256///
257/// # Arguments
258///
259/// * `settings` - Logging configuration including level, directory name, and
260///   retention settings.
261/// * `log_dir` - Absolute path to the directory where log files are written.
262///   The caller is responsible for computing this from
263///   [`Device::data_path`](crate::device::Device::data_path) so that logs land
264///   on the SD card when one is present. Pass
265///   `CURRENT_DEVICE.data_path(&settings.directory)` at call sites.
266///
267/// # Returns
268///
269/// Returns `Ok(())` on successful initialization.
270///
271/// # Errors
272///
273/// Returns an error if:
274/// - The log directory cannot be created
275/// - Log file cleanup fails
276/// - The rolling file appender cannot be initialized
277/// - The tracing subscriber cannot be initialized
278/// - OpenTelemetry initialization fails (when `otel` feature is enabled)
279///
280/// # Example
281///
282/// ```no_run
283/// use cadmus_core::device::CURRENT_DEVICE;
284/// use cadmus_core::settings::LoggingSettings;
285/// use cadmus_core::logging::init_logging;
286///
287/// let settings = LoggingSettings {
288///     enabled: true,
289///     level: "debug".to_string(),
290///     max_files: 5,
291///     directory: "logs".into(),
292///     otlp_endpoint: Some("http://localhost:4318".to_string()),
293///     pyroscope_endpoint: None,
294///     enable_kern_log: false,
295///     enable_dbus_log: false,
296/// };
297///
298/// let log_dir = CURRENT_DEVICE.data_path(&settings.directory);
299/// init_logging(&settings, log_dir)?;
300/// # Ok::<(), anyhow::Error>(())
301/// ```
302pub fn init_logging(settings: &LoggingSettings, log_dir: std::path::PathBuf) -> Result<(), Error> {
303    if !settings.enabled {
304        return Ok(());
305    }
306
307    ensure!(
308        log_dir.is_absolute(),
309        "log_dir must be absolute, got {}",
310        log_dir.display()
311    );
312
313    fs::create_dir_all(&log_dir)
314        .with_context(|| format!("can't create log directory {}", &log_dir.display()))?;
315
316    cleanup_run_logs(&log_dir, settings.max_files)?;
317
318    let appender = tracing_appender::rolling::Builder::new()
319        .rotation(tracing_appender::rolling::Rotation::NEVER)
320        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
321        .filename_suffix(LOG_FILE_SUFFIX)
322        .max_log_files(settings.max_files)
323        .build(&log_dir)
324        .context("can't initialize rolling log file appender")?;
325
326    let (non_blocking, guard) = tracing_appender::non_blocking(appender);
327    let _ = LOG_GUARD.set(Mutex::new(Some(guard)));
328    let _ = WRITER_INNER.set(ArcSwap::new(Arc::new(non_blocking)));
329
330    let swappable = SwappableWriter {
331        inner: WRITER_INNER.get().expect("WRITER_INNER just set"),
332    };
333
334    let filter = build_filter(settings)?;
335
336    let fmt_layer = tracing_subscriber::fmt::layer()
337        .json()
338        .with_ansi(false)
339        .with_writer(swappable)
340        .with_current_span(true);
341
342    cfg_select! {
343        feature = "tracing" => {
344            let subscriber = tracing_subscriber::registry()
345                .with(filter)
346                .with(telemetry::tracing::init_telemetry(settings, get_run_id())?)
347                .with(fmt_layer);
348
349            subscriber
350                .try_init()
351                .context("can't initialize tracing subscriber")?;
352        }
353        _ => {
354            let subscriber = tracing_subscriber::registry().with(filter).with(fmt_layer);
355
356            subscriber
357                .try_init()
358                .context("can't initialize tracing subscriber")?;
359        }
360    }
361
362    eprintln!(
363        "Cadmus run started with ID: {} (version {})",
364        get_run_id(),
365        get_current_version()
366    );
367
368    #[cfg(feature = "test")]
369    if settings.enable_kern_log {
370        kern::spawn_kern_log_thread();
371    }
372
373    Ok(())
374}
375
376/// Gracefully shuts down the logging system and flushes buffered data.
377///
378/// This function ensures all buffered log data is written to disk and, if enabled,
379/// exported to OpenTelemetry endpoints before the application exits. It:
380/// - Flushes the file appender buffer (happens automatically via `LOG_GUARD` drop)
381/// - Shuts down OpenTelemetry providers (when `otel` feature is enabled)
382/// - Ensures no log data is lost on exit
383///
384/// This function should be called once at application shutdown.
385///
386/// # Example
387///
388/// ```no_run
389/// use cadmus_core::device::CURRENT_DEVICE;
390/// use cadmus_core::logging::{init_logging, shutdown_logging};
391/// use cadmus_core::settings::LoggingSettings;
392///
393/// // At application start
394/// let settings = LoggingSettings::default();
395/// let log_dir = CURRENT_DEVICE.data_path(&settings.directory);
396/// init_logging(&settings, log_dir)?;
397///
398/// // ... application runs ...
399///
400/// // At application exit
401/// shutdown_logging();
402/// # Ok::<(), anyhow::Error>(())
403/// ```
404pub fn shutdown_logging() {
405    if let Some(mutex) = LOG_GUARD.get() {
406        if let Ok(mut guard_opt) = mutex.lock() {
407            if let Some(guard) = guard_opt.take() {
408                let (tx, rx) = mpsc::channel();
409
410                thread::spawn(move || {
411                    drop(guard);
412                    let _ = tx.send(());
413                });
414
415                let _ = rx.recv_timeout(Duration::from_secs(5));
416                eprintln!("Logging shutdown complete.");
417            }
418        }
419    }
420
421    #[cfg(feature = "tracing")]
422    telemetry::tracing::shutdown_telemetry();
423}
424
425/// Redirects log output to `dir`, flushing the current file first.
426///
427/// Used to keep logging alive across USB share when /mnt/onboard is unmounted.
428///
429/// This function creates a new rolling file appender in the specified directory and updates the
430/// logging system to use it. The old appender is dropped, which flushes any buffered data to disk
431/// after the new appender is in place to avoid log loss.
432pub fn redirect_log_to_dir(dir: &Path, settings: &LoggingSettings) -> Result<(), Error> {
433    ensure!(
434        dir.is_absolute(),
435        "log_dir must be absolute, got {}",
436        dir.display()
437    );
438
439    let (Some(writer_swap), Some(guard_mutex)) = (WRITER_INNER.get(), LOG_GUARD.get()) else {
440        return Ok(());
441    };
442
443    fs::create_dir_all(dir)
444        .with_context(|| format!("can't create log directory {}", dir.display()))?;
445
446    let appender = tracing_appender::rolling::Builder::new()
447        .rotation(tracing_appender::rolling::Rotation::NEVER)
448        .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
449        .filename_suffix(LOG_FILE_SUFFIX)
450        .max_log_files(settings.max_files)
451        .build(dir)
452        .context("can't build log appender for redirect")?;
453
454    let (non_blocking, new_guard) = tracing_appender::non_blocking(appender);
455
456    writer_swap.store(Arc::new(non_blocking));
457
458    let old_guard = {
459        let mut guard_opt = guard_mutex
460            .lock()
461            .map_err(|e| anyhow::anyhow!("failed to get lock for guard during redirect: {e}"))?;
462        let old = guard_opt.take();
463        *guard_opt = Some(new_guard);
464        old
465    };
466
467    drop(old_guard);
468
469    Ok(())
470}
471
472/// Builds an `EnvFilter` from settings or environment variables.
473///
474/// The function checks for the `RUST_LOG` environment variable first, which
475/// overrides the `level` setting. If `RUST_LOG` is not set, it uses the
476/// level from `LoggingSettings` (defaulting to "info" if empty).
477///
478/// # Arguments
479///
480/// * `settings` - Logging settings containing the default level
481///
482/// # Returns
483///
484/// Returns a configured `EnvFilter` instance.
485///
486/// # Errors
487///
488/// Returns an error if the log level string cannot be parsed.
489///
490/// # Example Filter Syntax
491///
492/// ```bash
493/// # Global level
494/// RUST_LOG=debug
495///
496/// # Per-module levels
497/// RUST_LOG=cadmus::view=trace,info
498///
499/// # Complex filtering
500/// RUST_LOG=warn,cadmus::document=debug,cadmus::sync=trace
501/// ```
502fn build_filter(settings: &LoggingSettings) -> Result<EnvFilter, Error> {
503    if let Ok(filter) = EnvFilter::try_from_default_env() {
504        return Ok(filter);
505    }
506
507    let level = settings.level.trim();
508    let level = if level.is_empty() { "info" } else { level };
509
510    EnvFilter::builder()
511        .parse(level)
512        .context("invalid logging level")
513}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518    use std::sync::OnceLock;
519    use tempfile::TempDir;
520
521    /// Guard that ensures `init_logging` is called at most once per test binary.
522    ///
523    /// `init_logging` registers a global tracing subscriber via `try_init()`, which
524    /// panics (or returns an error) on a second call within the same process. All
525    /// tests that need the logging statics populated must go through this helper.
526    static LOGGING_INIT: OnceLock<TempDir> = OnceLock::new();
527
528    /// Initialise logging once for the whole test binary and return the log dir.
529    ///
530    /// Subsequent calls return the already-initialised directory, so the test can
531    /// be run together with other tests without conflicts.
532    fn ensure_logging_init() -> &'static std::path::Path {
533        LOGGING_INIT
534            .get_or_init(|| {
535                let dir = TempDir::new().expect("failed to create temp dir for logging init");
536                let settings = LoggingSettings {
537                    enabled: true,
538                    level: "info".to_string(),
539                    max_files: 5,
540                    directory: dir.path().to_path_buf(),
541                    otlp_endpoint: None,
542                    pyroscope_endpoint: None,
543                    enable_kern_log: false,
544                    enable_dbus_log: false,
545                };
546                init_logging(&settings, dir.path().to_path_buf())
547                    .expect("failed to initialize logging for tests");
548                dir
549            })
550            .path()
551    }
552
553    fn create_log_file(dir: &std::path::Path, index: usize) -> Result<(), Error> {
554        let file_name = format!("{}{:04}.{}", LOG_FILE_PREFIX, index, LOG_FILE_SUFFIX);
555        fs::write(dir.join(file_name), b"{}")?;
556        Ok(())
557    }
558
559    fn collect_log_file_names(dir: &std::path::Path) -> Result<Vec<String>, Error> {
560        let mut entries = collect_run_log_entries(dir)?;
561        entries.sort_by_key(|entry| entry.file_name());
562        Ok(entries
563            .into_iter()
564            .map(|entry| entry.file_name().to_string_lossy().into_owned())
565            .collect())
566    }
567
568    #[test]
569    fn test_cleanup_run_logs_removes_oldest_entries() -> Result<(), Error> {
570        let temp_dir = TempDir::new()?;
571        for index in 1..=5 {
572            create_log_file(temp_dir.path(), index)?;
573        }
574
575        cleanup_run_logs(temp_dir.path(), 3)?;
576
577        let remaining = collect_log_file_names(temp_dir.path())?;
578        assert_eq!(remaining.len(), 3);
579        assert_eq!(
580            remaining,
581            vec![
582                format!("{}0003.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
583                format!("{}0004.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
584                format!("{}0005.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
585            ]
586        );
587
588        Ok(())
589    }
590
591    #[test]
592    fn test_cleanup_run_logs_max_files_zero_keeps_all() -> Result<(), Error> {
593        let temp_dir = TempDir::new()?;
594        for index in 1..=3 {
595            create_log_file(temp_dir.path(), index)?;
596        }
597
598        cleanup_run_logs(temp_dir.path(), 0)?;
599
600        let remaining = collect_log_file_names(temp_dir.path())?;
601        assert_eq!(remaining.len(), 3);
602
603        Ok(())
604    }
605
606    /// Verify that `redirect_log_to_dir` creates a log file in the new target
607    /// directory and that the file name matches the expected `cadmus-<run_id>.json`
608    /// pattern.
609    ///
610    /// # Why this test needs `init_logging`
611    ///
612    /// `redirect_log_to_dir` is a no-op when the global `WRITER_INNER` / `LOG_GUARD`
613    /// statics are unset. Those statics are populated only by `init_logging`, so we
614    /// must call it (once, via `ensure_logging_init`) before exercising the redirect
615    /// path. The `ensure_logging_init` helper uses a `OnceLock` so that the global
616    /// tracing subscriber is registered at most once per test binary, avoiding the
617    /// "subscriber already set" error that `try_init()` would otherwise produce.
618    #[test]
619    fn test_redirect_log_to_dir_creates_log_file_in_new_dir() -> Result<(), Error> {
620        ensure_logging_init();
621
622        let redirect_dir = TempDir::new()?;
623
624        let settings = LoggingSettings {
625            enabled: true,
626            level: "info".to_string(),
627            max_files: 5,
628            directory: redirect_dir.path().to_path_buf(),
629            otlp_endpoint: None,
630            pyroscope_endpoint: None,
631            enable_kern_log: false,
632            enable_dbus_log: false,
633        };
634
635        redirect_log_to_dir(redirect_dir.path(), &settings)?;
636
637        // After redirect a non-blocking appender is set up for `redirect_dir`.
638        // The underlying file is created lazily on the first write; emit one log
639        // event so the file is flushed to disk before we inspect the directory.
640        tracing::info!("test redirect log event");
641
642        let expected_file_name = format!("{}{}.{}", LOG_FILE_PREFIX, get_run_id(), LOG_FILE_SUFFIX);
643
644        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
645        #[allow(unused_assignments)]
646        let mut log_files = Vec::new();
647
648        loop {
649            log_files = collect_log_file_names(redirect_dir.path())?;
650
651            if !log_files.is_empty() && log_files.iter().any(|name| name == &expected_file_name) {
652                break;
653            }
654
655            if std::time::Instant::now() >= deadline {
656                break;
657            }
658
659            std::thread::sleep(std::time::Duration::from_millis(50));
660        }
661
662        assert!(
663            !log_files.is_empty(),
664            "expected at least one log file in the redirect directory, but found none"
665        );
666
667        assert!(
668            log_files.iter().any(|name| name == &expected_file_name),
669            "expected a log file named '{}' in redirect dir, but found: {:?}",
670            expected_file_name,
671            log_files,
672        );
673
674        Ok(())
675    }
676}