cadmus_core/logging.rs
1//! Structured logging infrastructure with JSON output and OpenTelemetry integration.
2//!
3//! This module provides logging functionality for Cadmus, including:
4//! - JSON-structured logs written to rotating files
5//! - Configurable log levels and filtering
6//! - Automatic log file cleanup based on retention policies
7//! - Optional OpenTelemetry export (when `otel` feature is enabled)
8//! - Unique run ID for correlating logs across a session
9//!
10//! # Architecture
11//!
12//! The logging system is built on the `tracing` crate ecosystem:
13//! - `tracing_subscriber` for composable logging layers
14//! - `tracing_appender` for non-blocking file I/O
15//! - JSON formatting for structured, machine-readable logs
16//! - `EnvFilter` for flexible log level control
17//!
18//! Each application run generates a unique Run ID (UUID v7) that appears in:
19//! - The log filename: `cadmus-<run_id>.json`
20//! - OpenTelemetry resource attributes
21//! - All log entries for correlation
22//!
23//! # Log File Management
24//!
25//! Log files are automatically managed:
26//! - Files are named with the run ID: `cadmus-<run_id>.json`
27//! - Older files are deleted when `max_files` limit is exceeded
28//! - Cleanup happens at initialization, keeping only the most recent files
29//!
30//! # Configuration
31//!
32//! Logging is configured via `LoggingSettings`:
33//!
34//! ```toml
35//! [logging]
36//! enabled = true
37//! level = "info"
38//! max-files = 3
39//! directory = "logs"
40//! otlp-endpoint = "http://localhost:4318" # Optional
41//! ```
42//!
43//! The log level can be overridden with the `RUST_LOG` environment variable:
44//!
45//! ```bash
46//! RUST_LOG=debug ./cadmus
47//! RUST_LOG=cadmus::view=trace,info ./cadmus
48//! ```
49//!
50//! # Example Usage
51//!
52//! ```no_run
53//! use cadmus_core::device::CURRENT_DEVICE;
54//! use cadmus_core::settings::LoggingSettings;
55//! use cadmus_core::logging::{init_logging, shutdown_logging, get_run_id};
56//!
57//! let settings = LoggingSettings {
58//! enabled: true,
59//! level: "info".to_string(),
60//! max_files: 3,
61//! directory: "logs".into(),
62//! otlp_endpoint: None,
63//! pyroscope_endpoint: None,
64//! enable_kern_log: false,
65//! enable_dbus_log: false,
66//! };
67//!
68//! // Initialize at application startup
69//! let log_dir = CURRENT_DEVICE.data_path(&settings.directory);
70//! init_logging(&settings, log_dir)?;
71//! eprintln!("Started with run ID: {}", get_run_id());
72//!
73//! // Use tracing macros throughout the application
74//! tracing::info!("Application started");
75//!
76//! // Shutdown at application exit (flushes buffers)
77//! shutdown_logging();
78//! # Ok::<(), anyhow::Error>(())
79//! ```
80
81use crate::settings::LoggingSettings;
82#[cfg(feature = "tracing")]
83use crate::telemetry;
84use crate::version::get_current_version;
85use anyhow::{Context, Error, ensure};
86use arc_swap::ArcSwap;
87use std::fs;
88use std::fs::DirEntry;
89use std::path::Path;
90use std::sync::mpsc;
91use std::sync::{Arc, Mutex, OnceLock};
92use std::thread;
93use std::time::Duration;
94use tracing_appender::non_blocking::{NonBlocking, WorkerGuard};
95use tracing_subscriber::EnvFilter;
96use tracing_subscriber::prelude::*;
97use uuid::Uuid;
98
99mod kern;
100
101const LOG_FILE_PREFIX: &str = "cadmus-";
102const LOG_FILE_SUFFIX: &str = "json";
103
104static LOG_GUARD: OnceLock<Mutex<Option<WorkerGuard>>> = OnceLock::new();
105static RUN_ID: OnceLock<String> = OnceLock::new();
106static WRITER_INNER: OnceLock<ArcSwap<NonBlocking>> = OnceLock::new();
107
108struct SwappableWriter {
109 inner: &'static ArcSwap<NonBlocking>,
110}
111
112impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for SwappableWriter {
113 type Writer = NonBlocking;
114
115 /// Performs a lock-free atomic load of the current writer.
116 fn make_writer(&'a self) -> Self::Writer {
117 self.inner.load().as_ref().clone()
118 }
119}
120
121/// Returns the unique run ID for this application session.
122///
123/// The run ID is a UUID v7 generated at first access and remains constant
124/// for the lifetime of the process. It is used to:
125/// - Name the log file: `cadmus-<run_id>.json`
126/// - Tag OpenTelemetry telemetry exports
127/// - Correlate all operations within a single run
128///
129/// # Returns
130///
131/// A string slice containing the run ID, valid for the program's lifetime.
132///
133/// # Example
134///
135/// ```
136/// use cadmus_core::logging::get_run_id;
137///
138/// let run_id = get_run_id();
139/// eprintln!("Application run ID: {}", run_id);
140/// assert_eq!(get_run_id(), run_id); // Consistent across calls
141/// ```
142pub fn get_run_id() -> &'static str {
143 RUN_ID.get_or_init(|| Uuid::now_v7().to_string()).as_str()
144}
145
146/// Removes old log files to maintain the configured retention limit.
147///
148/// This function scans the log directory for files matching the pattern
149/// `cadmus-*.json` and deletes the oldest files if the count exceeds `max_files`.
150///
151/// Note: this relies on the run ID being a UUID v7 (time-ordered). Filenames are
152/// `cadmus-<run_id>.json` where `<run_id>` is generated with `Uuid::now_v7()`,
153/// so lexicographic sorting of the filenames corresponds to chronological order.
154/// Sorting by file name therefore yields oldest-first ordering for removal.
155///
156/// # Arguments
157///
158/// * `log_dir` - Path to the directory containing log files
159/// * `max_files` - Maximum number of log files to retain (0 = keep all)
160///
161/// # Returns
162///
163/// Returns `Ok(())` on success.
164///
165/// # Errors
166///
167/// Returns an error if:
168/// - The log directory cannot be read
169/// - Individual directory entries cannot be read
170/// - Old log files cannot be deleted
171fn cleanup_run_logs(log_dir: &std::path::Path, max_files: usize) -> Result<(), Error> {
172 if max_files == 0 {
173 return Ok(());
174 }
175
176 let mut entries = collect_run_log_entries(log_dir)?;
177 if entries.len() <= max_files {
178 return Ok(());
179 }
180
181 entries.sort_by_key(|entry| entry.file_name());
182 let remove_count = entries.len().saturating_sub(max_files);
183 for entry in entries.into_iter().take(remove_count) {
184 fs::remove_file(entry.path())
185 .with_context(|| format!("can't remove old log file {}", entry.path().display()))?;
186 }
187
188 Ok(())
189}
190
191/// Collects all Cadmus log file entries from the specified directory.
192///
193/// Only files matching the pattern `cadmus-*.json` are collected.
194///
195/// # Arguments
196///
197/// * `log_dir` - Path to the directory to scan
198///
199/// # Returns
200///
201/// Returns a vector of directory entries representing log files.
202///
203/// # Errors
204///
205/// Returns an error if the directory cannot be read or entries are inaccessible.
206fn collect_run_log_entries(log_dir: &std::path::Path) -> Result<Vec<DirEntry>, Error> {
207 let mut entries = Vec::new();
208 for entry in fs::read_dir(log_dir)
209 .with_context(|| format!("can't read log directory {}", log_dir.display()))?
210 {
211 let entry = entry.context("can't read log directory entry")?;
212 if is_run_log_entry(&entry) {
213 entries.push(entry);
214 }
215 }
216
217 Ok(entries)
218}
219
220/// Determines whether a directory entry is a Cadmus log file.
221///
222/// Returns `true` if the filename starts with `cadmus-` and ends with `.json`.
223///
224/// # Arguments
225///
226/// * `entry` - Directory entry to check
227///
228/// # Returns
229///
230/// `true` if the entry is a log file, `false` otherwise.
231fn is_run_log_entry(entry: &DirEntry) -> bool {
232 let file_name = entry.file_name();
233 let file_name = file_name.to_string_lossy();
234 if !file_name.starts_with(LOG_FILE_PREFIX) {
235 return false;
236 }
237
238 file_name.ends_with(LOG_FILE_SUFFIX)
239}
240
241/// Initializes the logging system with JSON output and optional OpenTelemetry export.
242///
243/// This function sets up the complete logging infrastructure:
244/// - Creates the log directory if it doesn't exist
245/// - Cleans up old log files based on retention policy
246/// - Configures a rolling file appender with non-blocking I/O
247/// - Applies log level filtering from settings or environment
248/// - Sets up JSON formatting for structured logs
249/// - Initializes tracing export if the `tracing` feature is enabled
250/// - Bridges `log::` records (e.g. from pyroscope-rs) into the tracing pipeline
251/// so they appear in Loki alongside tracing events via the tracing-log
252/// layer automatically enabled by tracing-subscriber.
253///
254/// The function should only be called once at application startup.
255/// The logging system remains active until `shutdown_logging()` is called.
256///
257/// # Arguments
258///
259/// * `settings` - Logging configuration including level, directory name, and
260/// retention settings.
261/// * `log_dir` - Absolute path to the directory where log files are written.
262/// The caller is responsible for computing this from
263/// [`Device::data_path`](crate::device::Device::data_path) so that logs land
264/// on the SD card when one is present. Pass
265/// `CURRENT_DEVICE.data_path(&settings.directory)` at call sites.
266///
267/// # Returns
268///
269/// Returns `Ok(())` on successful initialization.
270///
271/// # Errors
272///
273/// Returns an error if:
274/// - The log directory cannot be created
275/// - Log file cleanup fails
276/// - The rolling file appender cannot be initialized
277/// - The tracing subscriber cannot be initialized
278/// - OpenTelemetry initialization fails (when `otel` feature is enabled)
279///
280/// # Example
281///
282/// ```no_run
283/// use cadmus_core::device::CURRENT_DEVICE;
284/// use cadmus_core::settings::LoggingSettings;
285/// use cadmus_core::logging::init_logging;
286///
287/// let settings = LoggingSettings {
288/// enabled: true,
289/// level: "debug".to_string(),
290/// max_files: 5,
291/// directory: "logs".into(),
292/// otlp_endpoint: Some("http://localhost:4318".to_string()),
293/// pyroscope_endpoint: None,
294/// enable_kern_log: false,
295/// enable_dbus_log: false,
296/// };
297///
298/// let log_dir = CURRENT_DEVICE.data_path(&settings.directory);
299/// init_logging(&settings, log_dir)?;
300/// # Ok::<(), anyhow::Error>(())
301/// ```
302pub fn init_logging(settings: &LoggingSettings, log_dir: std::path::PathBuf) -> Result<(), Error> {
303 if !settings.enabled {
304 return Ok(());
305 }
306
307 ensure!(
308 log_dir.is_absolute(),
309 "log_dir must be absolute, got {}",
310 log_dir.display()
311 );
312
313 fs::create_dir_all(&log_dir)
314 .with_context(|| format!("can't create log directory {}", &log_dir.display()))?;
315
316 cleanup_run_logs(&log_dir, settings.max_files)?;
317
318 let appender = tracing_appender::rolling::Builder::new()
319 .rotation(tracing_appender::rolling::Rotation::NEVER)
320 .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
321 .filename_suffix(LOG_FILE_SUFFIX)
322 .max_log_files(settings.max_files)
323 .build(&log_dir)
324 .context("can't initialize rolling log file appender")?;
325
326 let (non_blocking, guard) = tracing_appender::non_blocking(appender);
327 let _ = LOG_GUARD.set(Mutex::new(Some(guard)));
328 let _ = WRITER_INNER.set(ArcSwap::new(Arc::new(non_blocking)));
329
330 let swappable = SwappableWriter {
331 inner: WRITER_INNER.get().expect("WRITER_INNER just set"),
332 };
333
334 let filter = build_filter(settings)?;
335
336 let fmt_layer = tracing_subscriber::fmt::layer()
337 .json()
338 .with_ansi(false)
339 .with_writer(swappable)
340 .with_current_span(true);
341
342 cfg_select! {
343 feature = "tracing" => {
344 let subscriber = tracing_subscriber::registry()
345 .with(filter)
346 .with(telemetry::tracing::init_telemetry(settings, get_run_id())?)
347 .with(fmt_layer);
348
349 subscriber
350 .try_init()
351 .context("can't initialize tracing subscriber")?;
352 }
353 _ => {
354 let subscriber = tracing_subscriber::registry().with(filter).with(fmt_layer);
355
356 subscriber
357 .try_init()
358 .context("can't initialize tracing subscriber")?;
359 }
360 }
361
362 eprintln!(
363 "Cadmus run started with ID: {} (version {})",
364 get_run_id(),
365 get_current_version()
366 );
367
368 #[cfg(feature = "test")]
369 if settings.enable_kern_log {
370 kern::spawn_kern_log_thread();
371 }
372
373 Ok(())
374}
375
376/// Gracefully shuts down the logging system and flushes buffered data.
377///
378/// This function ensures all buffered log data is written to disk and, if enabled,
379/// exported to OpenTelemetry endpoints before the application exits. It:
380/// - Flushes the file appender buffer (happens automatically via `LOG_GUARD` drop)
381/// - Shuts down OpenTelemetry providers (when `otel` feature is enabled)
382/// - Ensures no log data is lost on exit
383///
384/// This function should be called once at application shutdown.
385///
386/// # Example
387///
388/// ```no_run
389/// use cadmus_core::device::CURRENT_DEVICE;
390/// use cadmus_core::logging::{init_logging, shutdown_logging};
391/// use cadmus_core::settings::LoggingSettings;
392///
393/// // At application start
394/// let settings = LoggingSettings::default();
395/// let log_dir = CURRENT_DEVICE.data_path(&settings.directory);
396/// init_logging(&settings, log_dir)?;
397///
398/// // ... application runs ...
399///
400/// // At application exit
401/// shutdown_logging();
402/// # Ok::<(), anyhow::Error>(())
403/// ```
404pub fn shutdown_logging() {
405 if let Some(mutex) = LOG_GUARD.get() {
406 if let Ok(mut guard_opt) = mutex.lock() {
407 if let Some(guard) = guard_opt.take() {
408 let (tx, rx) = mpsc::channel();
409
410 thread::spawn(move || {
411 drop(guard);
412 let _ = tx.send(());
413 });
414
415 let _ = rx.recv_timeout(Duration::from_secs(5));
416 eprintln!("Logging shutdown complete.");
417 }
418 }
419 }
420
421 #[cfg(feature = "tracing")]
422 telemetry::tracing::shutdown_telemetry();
423}
424
425/// Redirects log output to `dir`, flushing the current file first.
426///
427/// Used to keep logging alive across USB share when /mnt/onboard is unmounted.
428///
429/// This function creates a new rolling file appender in the specified directory and updates the
430/// logging system to use it. The old appender is dropped, which flushes any buffered data to disk
431/// after the new appender is in place to avoid log loss.
432pub fn redirect_log_to_dir(dir: &Path, settings: &LoggingSettings) -> Result<(), Error> {
433 ensure!(
434 dir.is_absolute(),
435 "log_dir must be absolute, got {}",
436 dir.display()
437 );
438
439 let (Some(writer_swap), Some(guard_mutex)) = (WRITER_INNER.get(), LOG_GUARD.get()) else {
440 return Ok(());
441 };
442
443 fs::create_dir_all(dir)
444 .with_context(|| format!("can't create log directory {}", dir.display()))?;
445
446 let appender = tracing_appender::rolling::Builder::new()
447 .rotation(tracing_appender::rolling::Rotation::NEVER)
448 .filename_prefix(format!("{}{}", LOG_FILE_PREFIX, get_run_id()))
449 .filename_suffix(LOG_FILE_SUFFIX)
450 .max_log_files(settings.max_files)
451 .build(dir)
452 .context("can't build log appender for redirect")?;
453
454 let (non_blocking, new_guard) = tracing_appender::non_blocking(appender);
455
456 writer_swap.store(Arc::new(non_blocking));
457
458 let old_guard = {
459 let mut guard_opt = guard_mutex
460 .lock()
461 .map_err(|e| anyhow::anyhow!("failed to get lock for guard during redirect: {e}"))?;
462 let old = guard_opt.take();
463 *guard_opt = Some(new_guard);
464 old
465 };
466
467 drop(old_guard);
468
469 Ok(())
470}
471
472/// Builds an `EnvFilter` from settings or environment variables.
473///
474/// The function checks for the `RUST_LOG` environment variable first, which
475/// overrides the `level` setting. If `RUST_LOG` is not set, it uses the
476/// level from `LoggingSettings` (defaulting to "info" if empty).
477///
478/// # Arguments
479///
480/// * `settings` - Logging settings containing the default level
481///
482/// # Returns
483///
484/// Returns a configured `EnvFilter` instance.
485///
486/// # Errors
487///
488/// Returns an error if the log level string cannot be parsed.
489///
490/// # Example Filter Syntax
491///
492/// ```bash
493/// # Global level
494/// RUST_LOG=debug
495///
496/// # Per-module levels
497/// RUST_LOG=cadmus::view=trace,info
498///
499/// # Complex filtering
500/// RUST_LOG=warn,cadmus::document=debug,cadmus::sync=trace
501/// ```
502fn build_filter(settings: &LoggingSettings) -> Result<EnvFilter, Error> {
503 if let Ok(filter) = EnvFilter::try_from_default_env() {
504 return Ok(filter);
505 }
506
507 let level = settings.level.trim();
508 let level = if level.is_empty() { "info" } else { level };
509
510 EnvFilter::builder()
511 .parse(level)
512 .context("invalid logging level")
513}
514
515#[cfg(test)]
516mod tests {
517 use super::*;
518 use std::sync::OnceLock;
519 use tempfile::TempDir;
520
521 /// Guard that ensures `init_logging` is called at most once per test binary.
522 ///
523 /// `init_logging` registers a global tracing subscriber via `try_init()`, which
524 /// panics (or returns an error) on a second call within the same process. All
525 /// tests that need the logging statics populated must go through this helper.
526 static LOGGING_INIT: OnceLock<TempDir> = OnceLock::new();
527
528 /// Initialise logging once for the whole test binary and return the log dir.
529 ///
530 /// Subsequent calls return the already-initialised directory, so the test can
531 /// be run together with other tests without conflicts.
532 fn ensure_logging_init() -> &'static std::path::Path {
533 LOGGING_INIT
534 .get_or_init(|| {
535 let dir = TempDir::new().expect("failed to create temp dir for logging init");
536 let settings = LoggingSettings {
537 enabled: true,
538 level: "info".to_string(),
539 max_files: 5,
540 directory: dir.path().to_path_buf(),
541 otlp_endpoint: None,
542 pyroscope_endpoint: None,
543 enable_kern_log: false,
544 enable_dbus_log: false,
545 };
546 init_logging(&settings, dir.path().to_path_buf())
547 .expect("failed to initialize logging for tests");
548 dir
549 })
550 .path()
551 }
552
553 fn create_log_file(dir: &std::path::Path, index: usize) -> Result<(), Error> {
554 let file_name = format!("{}{:04}.{}", LOG_FILE_PREFIX, index, LOG_FILE_SUFFIX);
555 fs::write(dir.join(file_name), b"{}")?;
556 Ok(())
557 }
558
559 fn collect_log_file_names(dir: &std::path::Path) -> Result<Vec<String>, Error> {
560 let mut entries = collect_run_log_entries(dir)?;
561 entries.sort_by_key(|entry| entry.file_name());
562 Ok(entries
563 .into_iter()
564 .map(|entry| entry.file_name().to_string_lossy().into_owned())
565 .collect())
566 }
567
568 #[test]
569 fn test_cleanup_run_logs_removes_oldest_entries() -> Result<(), Error> {
570 let temp_dir = TempDir::new()?;
571 for index in 1..=5 {
572 create_log_file(temp_dir.path(), index)?;
573 }
574
575 cleanup_run_logs(temp_dir.path(), 3)?;
576
577 let remaining = collect_log_file_names(temp_dir.path())?;
578 assert_eq!(remaining.len(), 3);
579 assert_eq!(
580 remaining,
581 vec![
582 format!("{}0003.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
583 format!("{}0004.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
584 format!("{}0005.{}", LOG_FILE_PREFIX, LOG_FILE_SUFFIX),
585 ]
586 );
587
588 Ok(())
589 }
590
591 #[test]
592 fn test_cleanup_run_logs_max_files_zero_keeps_all() -> Result<(), Error> {
593 let temp_dir = TempDir::new()?;
594 for index in 1..=3 {
595 create_log_file(temp_dir.path(), index)?;
596 }
597
598 cleanup_run_logs(temp_dir.path(), 0)?;
599
600 let remaining = collect_log_file_names(temp_dir.path())?;
601 assert_eq!(remaining.len(), 3);
602
603 Ok(())
604 }
605
606 /// Verify that `redirect_log_to_dir` creates a log file in the new target
607 /// directory and that the file name matches the expected `cadmus-<run_id>.json`
608 /// pattern.
609 ///
610 /// # Why this test needs `init_logging`
611 ///
612 /// `redirect_log_to_dir` is a no-op when the global `WRITER_INNER` / `LOG_GUARD`
613 /// statics are unset. Those statics are populated only by `init_logging`, so we
614 /// must call it (once, via `ensure_logging_init`) before exercising the redirect
615 /// path. The `ensure_logging_init` helper uses a `OnceLock` so that the global
616 /// tracing subscriber is registered at most once per test binary, avoiding the
617 /// "subscriber already set" error that `try_init()` would otherwise produce.
618 #[test]
619 fn test_redirect_log_to_dir_creates_log_file_in_new_dir() -> Result<(), Error> {
620 ensure_logging_init();
621
622 let redirect_dir = TempDir::new()?;
623
624 let settings = LoggingSettings {
625 enabled: true,
626 level: "info".to_string(),
627 max_files: 5,
628 directory: redirect_dir.path().to_path_buf(),
629 otlp_endpoint: None,
630 pyroscope_endpoint: None,
631 enable_kern_log: false,
632 enable_dbus_log: false,
633 };
634
635 redirect_log_to_dir(redirect_dir.path(), &settings)?;
636
637 // After redirect a non-blocking appender is set up for `redirect_dir`.
638 // The underlying file is created lazily on the first write; emit one log
639 // event so the file is flushed to disk before we inspect the directory.
640 tracing::info!("test redirect log event");
641
642 let expected_file_name = format!("{}{}.{}", LOG_FILE_PREFIX, get_run_id(), LOG_FILE_SUFFIX);
643
644 let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
645 #[allow(unused_assignments)]
646 let mut log_files = Vec::new();
647
648 loop {
649 log_files = collect_log_file_names(redirect_dir.path())?;
650
651 if !log_files.is_empty() && log_files.iter().any(|name| name == &expected_file_name) {
652 break;
653 }
654
655 if std::time::Instant::now() >= deadline {
656 break;
657 }
658
659 std::thread::sleep(std::time::Duration::from_millis(50));
660 }
661
662 assert!(
663 !log_files.is_empty(),
664 "expected at least one log file in the redirect directory, but found none"
665 );
666
667 assert!(
668 log_files.iter().any(|name| name == &expected_file_name),
669 "expected a log file named '{}' in redirect dir, but found: {:?}",
670 expected_file_name,
671 log_files,
672 );
673
674 Ok(())
675 }
676}