diff options
author | Stefan Kreutz <mail@skreutz.com> | 2024-03-24 15:04:09 +0100 |
---|---|---|
committer | Stefan Kreutz <mail@skreutz.com> | 2024-03-24 15:04:09 +0100 |
commit | c1fa48e9bd617d70e823efef5d6dcea41b1d2087 (patch) | |
tree | 421e69c512ac54bf65495ef23fd7d9ec5a5e67d5 /src/main.rs | |
download | brck-0.1.0.tar |
Add initial implementationbrck-0.1.0
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..2d26eed --- /dev/null +++ b/src/main.rs @@ -0,0 +1,343 @@ +#![forbid(unsafe_code)] + +use std::{ + collections::HashMap, + num::{NonZeroU64, NonZeroUsize}, + path::PathBuf, + sync::{atomic::AtomicBool, Arc}, +}; + +use anyhow::{anyhow, Context}; +use clap::Parser; +use either::Either; +use flate2::{write::GzEncoder, Compression}; +use itertools::Itertools; +use parseq::ParallelIterator; + +mod core; +use core::*; + +mod fs; +use fs::*; + +// std::process::ExitCode::exit_process is unstable +// Exit codes should fit u8. +const EXIT_SUCCESS: i32 = 0; +const EXIT_FAILURE: i32 = 1; +const EXIT_INTERRUPT: i32 = 130; + +/// A simple bit rot checker for legacy file systems. +/// +/// On the first invocation, Brck records the modification time and a cryptographic hash sum of each regular file in the current working directory to a .brck file. +/// On subsequent invocations, Brck compares the recorded files against the current file system. +/// +/// A file can be added, touched, changed, unchanged, removed, or corrupted. +/// A file is corrupted if it's content changed but it's modification did not. +/// +/// Without any options, Brck denies corrupted files. +/// If Brck finds one or more denied differences, it prints them to standard output and exits >0. +/// Otherwise, Brck updates the .brck file. +#[derive(Debug, Parser)] +#[command(about, version, author)] +struct Args { + /// Increase verbosity. + /// + /// Specify once to print all changed files. + /// Specify twice to print unchanged files, too. + /// By default, only denied differences are printed. + #[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count)] + verbosity: u8, + + /// Do not actually modify the database. + #[arg(short = 'n', long)] + dry_run: bool, + + /// Specify the number of worker threads. + /// + /// Defaults to the available parallelism, typically the number of CPUs. + #[arg(short, long)] + jobs: Option<NonZeroUsize>, + + /// Deny select differences only. + /// + /// This option accepts a comma-separated list of differences to deny. + /// Without this option, only corrupted files are denied. + /// With this option, only the specified differences are denied. + #[arg(value_enum, short, long, default_values_t = vec![DiffKind::Corrupted], value_delimiter = ',', action = clap::ArgAction::Append)] + deny: Vec<DiffKind>, + + /// Enable structured output. + #[arg(short = 'J', long)] + json: bool, + + /// Print summary to standard error. + #[arg(short, long)] + summary: bool, + + /// Specify the maximum number of bytes a worker thread may read without checking for an interrupt signal. + /// + /// Defaults to 1 GiB. + #[arg(long)] + chunk_size: Option<NonZeroU64>, + + /// Specify the maximum number of files to read ahead. + /// + /// This should be greater than the number of worker threads. + /// + /// Defaults to 1024 files per worker thread. + #[arg(long)] + queue_size: Option<NonZeroUsize>, +} + +fn main() { + std::process::exit(match run() { + Ok(_) => EXIT_SUCCESS, + Err(err) if err.is::<clap::Error>() => { + // Use clap's own error formatter + eprintln!("{err}"); + EXIT_FAILURE + } + Err(err) => { + eprintln!("Error: {:?}", err); + match err.downcast_ref::<RecordError>() { + Some(RecordError::Interrupt) => EXIT_INTERRUPT, + _ => EXIT_FAILURE, + } + } + }) +} + +fn run() -> Result<(), anyhow::Error> { + let terminate = Arc::new(AtomicBool::new(false)); + let args = Args::try_parse()?; + + // Paths must begin with ./ for correct filtering. + let current_dir: PathBuf = std::path::Component::CurDir.as_os_str().into(); + let db_path = current_dir.join(".brck"); + let backup_path = current_dir.join(".brck.bak"); + + // Creating temporary files in the same directory instead of std::env_temp_dir() to enable atomic rename(2) on POSIX-conform systems. + // The drawback is that the operating system won't remove these files automatically. + let tmp_db_path = current_dir.join(".brck.tmp"); + let tmp_backup_path = current_dir.join(".brck.bak.tmp"); + + #[cfg(target_os = "openbsd")] + { + use std::{os::unix::ffi::OsStrExt, path::Path}; + + pledge::pledge![Stdio Rpath Wpath Cpath Unveil, Stdio].context("Failed to pledge(2)")?; + + fn unveil<P: AsRef<Path>>(path: P, permissions: &str) -> Result<(), anyhow::Error> { + unveil::unveil(path.as_ref().as_os_str().as_bytes(), permissions).with_context(|| { + format!( + "Failed to unveil(2) {} with permissions {}", + path.as_ref().display(), + permissions + ) + }) + } + + unveil(".", "r")?; + unveil(&db_path, "rwc")?; + unveil(&tmp_db_path, "rwc")?; + unveil(&backup_path, "rwc")?; + unveil(&tmp_backup_path, "rwc")?; + unveil::unveil("", "").context("Failed to disable unveil(2)")?; + } + + let t = terminate.clone(); + ctrlc::try_set_handler(move || { + if t.load(std::sync::atomic::Ordering::SeqCst) { + eprintln!("Received second interrupt signal"); + std::process::exit(EXIT_INTERRUPT); + } + eprintln!("Received interrupt signal"); + t.store(true, std::sync::atomic::Ordering::SeqCst); + }) + .context("Failed to register interrupt signal handler")?; + + let (db, first_run) = match read_db(&db_path) { + Ok(db) => Ok((Either::Left(db), false)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + Ok((Either::Right(std::iter::empty()), true)) + } + Err(err) => Err(err), + } + .with_context(|| format!("Failed to read database {}", db_path.display()))?; + let db = db.map(|item| { + item.map_err(|err| { + anyhow::Error::new(err).context(format!( + "Failed to decode record from database {}", + &db_path.display() + )) + }) + }); + + let tmp = TmpFile::open(&tmp_db_path).with_context(|| { + format!( + "Failed to create temporary database {}", + tmp_db_path.display() + ) + })?; + let mut tmp = GzEncoder::new(tmp.file(), Compression::default()); + + let jobs = args + .jobs + .or_else(|| std::thread::available_parallelism().ok()) + .map(NonZeroUsize::get) + .unwrap_or(1); + let queue_size = args + .queue_size + .map(NonZeroUsize::get) + .unwrap_or_else(|| jobs.saturating_mul(1024)); + let chunk_size = args + .chunk_size + .map(NonZeroU64::get) + .unwrap_or(1024 * 1024 * 1024); + + let fs = find_files(¤t_dir) + .filter_ok(|path| path != &db_path && path != &tmp_db_path && path != &backup_path) + .map(|item| { + item.map_err(|err| anyhow::Error::new(err).context("Failed to walk filesystem")) + }) + .map_parallel_limit(jobs, queue_size, move |item| { + item.and_then(|path| { + Record::from_path(&path, chunk_size, terminate.clone()) + .with_context(|| format!("Failed to read file {}", path.display())) + }) + }); + + let mut counter = HashMap::new(); + let mut denied = 0; + + diff(db, fs).try_for_each(|item| match item { + Err(Either::Left(err)) => Err(err), + Err(Either::Right(err)) => Err(err), + Ok(diff) => { + *counter.entry(diff.kind()).or_insert(0) += 1; + record(&diff, &mut tmp).with_context(|| { + format!( + "Failed to write to temporary database {}", + tmp_db_path.display() + ) + })?; + if args.deny.contains(&diff.kind()) { + if args.json { + println!("{}", serde_json::to_string(&diff).unwrap()); + } else { + println!("{diff}"); + } + denied += 1; + } else if args.verbosity > 1 + || (diff.kind() != DiffKind::Unchanged && args.verbosity > 0) + { + if args.json { + println!("{}", serde_json::to_string(&diff).unwrap()); + } else { + println!("{diff}"); + } + } + Ok(()) + } + })?; + + if args.summary { + let total: usize = counter.values().sum(); + let width = usize::try_from(total.checked_ilog10().unwrap_or(0) + 1).unwrap_or(0); + eprintln!( + "Removed: {:>width$}", + counter.get(&DiffKind::Removed).unwrap_or(&0) + ); + eprintln!( + "Added: {:>width$}", + counter.get(&DiffKind::Added).unwrap_or(&0) + ); + eprintln!( + "Unchanged: {:>width$}", + counter.get(&DiffKind::Unchanged).unwrap_or(&0) + ); + eprintln!( + "Corrupted: {:>width$}", + counter.get(&DiffKind::Corrupted).unwrap_or(&0) + ); + eprintln!( + "Touched: {:>width$}", + counter.get(&DiffKind::Touched).unwrap_or(&0) + ); + eprintln!( + "Changed: {:>width$}", + counter.get(&DiffKind::Changed).unwrap_or(&0) + ); + eprintln!("Total: {:>width$}", total); + } + + if denied > 0 { + return Err(anyhow!( + "Found {} denied {}", + denied, + if denied == 1 { + "difference" + } else { + "differences" + } + )); + } + + if args.dry_run { + eprintln!("Exiting due to dry run"); + return Ok(()); + } + + tmp.finish().with_context(|| { + format!( + "Failed to synchronize temporary database {}", + tmp_db_path.display() + ) + })?; + if !first_run { + copy_file(&db_path, tmp_backup_path, &backup_path).with_context(|| { + format!( + "Failed to backup database {} to {}", + db_path.display(), + backup_path.display() + ) + })?; + } + rename_file(&tmp_db_path, &db_path).with_context(|| { + format!( + "Failed to persist temporary database {} to {}", + tmp_db_path.display(), + db_path.display() + ) + })?; + + Ok(()) +} + +fn record<T: std::io::Write>(diff: &Diff, mut writer: T) -> Result<(), std::io::Error> { + let file = match diff { + Diff::Removed { .. } => None, + Diff::Added { new, .. } => Some(new), + Diff::Unchanged { new, .. } => Some(new), + Diff::Corrupted { new, .. } => Some(new), + Diff::Touched { new, .. } => Some(new), + Diff::Changed { new, .. } => Some(new), + }; + if let Some(file) = file { + let s = serde_json::to_string(file).unwrap(); + writeln!(writer, "{}", s) + } else { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn verify_clap_app() { + use clap::CommandFactory; + Args::command().debug_assert() + } +} |