summaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
authorStefan Kreutz <mail@skreutz.com>2024-03-24 15:04:09 +0100
committerStefan Kreutz <mail@skreutz.com>2024-03-24 15:04:09 +0100
commitc1fa48e9bd617d70e823efef5d6dcea41b1d2087 (patch)
tree421e69c512ac54bf65495ef23fd7d9ec5a5e67d5 /src/main.rs
downloadbrck-0.1.0.tar
Add initial implementationbrck-0.1.0
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs343
1 files changed, 343 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..2d26eed
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,343 @@
+#![forbid(unsafe_code)]
+
+use std::{
+ collections::HashMap,
+ num::{NonZeroU64, NonZeroUsize},
+ path::PathBuf,
+ sync::{atomic::AtomicBool, Arc},
+};
+
+use anyhow::{anyhow, Context};
+use clap::Parser;
+use either::Either;
+use flate2::{write::GzEncoder, Compression};
+use itertools::Itertools;
+use parseq::ParallelIterator;
+
+mod core;
+use core::*;
+
+mod fs;
+use fs::*;
+
+// std::process::ExitCode::exit_process is unstable
+// Exit codes should fit u8.
+const EXIT_SUCCESS: i32 = 0;
+const EXIT_FAILURE: i32 = 1;
+const EXIT_INTERRUPT: i32 = 130;
+
+/// A simple bit rot checker for legacy file systems.
+///
+/// On the first invocation, Brck records the modification time and a cryptographic hash sum of each regular file in the current working directory to a .brck file.
+/// On subsequent invocations, Brck compares the recorded files against the current file system.
+///
+/// A file can be added, touched, changed, unchanged, removed, or corrupted.
+/// A file is corrupted if it's content changed but it's modification did not.
+///
+/// Without any options, Brck denies corrupted files.
+/// If Brck finds one or more denied differences, it prints them to standard output and exits >0.
+/// Otherwise, Brck updates the .brck file.
+#[derive(Debug, Parser)]
+#[command(about, version, author)]
+struct Args {
+ /// Increase verbosity.
+ ///
+ /// Specify once to print all changed files.
+ /// Specify twice to print unchanged files, too.
+ /// By default, only denied differences are printed.
+ #[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count)]
+ verbosity: u8,
+
+ /// Do not actually modify the database.
+ #[arg(short = 'n', long)]
+ dry_run: bool,
+
+ /// Specify the number of worker threads.
+ ///
+ /// Defaults to the available parallelism, typically the number of CPUs.
+ #[arg(short, long)]
+ jobs: Option<NonZeroUsize>,
+
+ /// Deny select differences only.
+ ///
+ /// This option accepts a comma-separated list of differences to deny.
+ /// Without this option, only corrupted files are denied.
+ /// With this option, only the specified differences are denied.
+ #[arg(value_enum, short, long, default_values_t = vec![DiffKind::Corrupted], value_delimiter = ',', action = clap::ArgAction::Append)]
+ deny: Vec<DiffKind>,
+
+ /// Enable structured output.
+ #[arg(short = 'J', long)]
+ json: bool,
+
+ /// Print summary to standard error.
+ #[arg(short, long)]
+ summary: bool,
+
+ /// Specify the maximum number of bytes a worker thread may read without checking for an interrupt signal.
+ ///
+ /// Defaults to 1 GiB.
+ #[arg(long)]
+ chunk_size: Option<NonZeroU64>,
+
+ /// Specify the maximum number of files to read ahead.
+ ///
+ /// This should be greater than the number of worker threads.
+ ///
+ /// Defaults to 1024 files per worker thread.
+ #[arg(long)]
+ queue_size: Option<NonZeroUsize>,
+}
+
+fn main() {
+ std::process::exit(match run() {
+ Ok(_) => EXIT_SUCCESS,
+ Err(err) if err.is::<clap::Error>() => {
+ // Use clap's own error formatter
+ eprintln!("{err}");
+ EXIT_FAILURE
+ }
+ Err(err) => {
+ eprintln!("Error: {:?}", err);
+ match err.downcast_ref::<RecordError>() {
+ Some(RecordError::Interrupt) => EXIT_INTERRUPT,
+ _ => EXIT_FAILURE,
+ }
+ }
+ })
+}
+
+fn run() -> Result<(), anyhow::Error> {
+ let terminate = Arc::new(AtomicBool::new(false));
+ let args = Args::try_parse()?;
+
+ // Paths must begin with ./ for correct filtering.
+ let current_dir: PathBuf = std::path::Component::CurDir.as_os_str().into();
+ let db_path = current_dir.join(".brck");
+ let backup_path = current_dir.join(".brck.bak");
+
+ // Creating temporary files in the same directory instead of std::env_temp_dir() to enable atomic rename(2) on POSIX-conform systems.
+ // The drawback is that the operating system won't remove these files automatically.
+ let tmp_db_path = current_dir.join(".brck.tmp");
+ let tmp_backup_path = current_dir.join(".brck.bak.tmp");
+
+ #[cfg(target_os = "openbsd")]
+ {
+ use std::{os::unix::ffi::OsStrExt, path::Path};
+
+ pledge::pledge![Stdio Rpath Wpath Cpath Unveil, Stdio].context("Failed to pledge(2)")?;
+
+ fn unveil<P: AsRef<Path>>(path: P, permissions: &str) -> Result<(), anyhow::Error> {
+ unveil::unveil(path.as_ref().as_os_str().as_bytes(), permissions).with_context(|| {
+ format!(
+ "Failed to unveil(2) {} with permissions {}",
+ path.as_ref().display(),
+ permissions
+ )
+ })
+ }
+
+ unveil(".", "r")?;
+ unveil(&db_path, "rwc")?;
+ unveil(&tmp_db_path, "rwc")?;
+ unveil(&backup_path, "rwc")?;
+ unveil(&tmp_backup_path, "rwc")?;
+ unveil::unveil("", "").context("Failed to disable unveil(2)")?;
+ }
+
+ let t = terminate.clone();
+ ctrlc::try_set_handler(move || {
+ if t.load(std::sync::atomic::Ordering::SeqCst) {
+ eprintln!("Received second interrupt signal");
+ std::process::exit(EXIT_INTERRUPT);
+ }
+ eprintln!("Received interrupt signal");
+ t.store(true, std::sync::atomic::Ordering::SeqCst);
+ })
+ .context("Failed to register interrupt signal handler")?;
+
+ let (db, first_run) = match read_db(&db_path) {
+ Ok(db) => Ok((Either::Left(db), false)),
+ Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
+ Ok((Either::Right(std::iter::empty()), true))
+ }
+ Err(err) => Err(err),
+ }
+ .with_context(|| format!("Failed to read database {}", db_path.display()))?;
+ let db = db.map(|item| {
+ item.map_err(|err| {
+ anyhow::Error::new(err).context(format!(
+ "Failed to decode record from database {}",
+ &db_path.display()
+ ))
+ })
+ });
+
+ let tmp = TmpFile::open(&tmp_db_path).with_context(|| {
+ format!(
+ "Failed to create temporary database {}",
+ tmp_db_path.display()
+ )
+ })?;
+ let mut tmp = GzEncoder::new(tmp.file(), Compression::default());
+
+ let jobs = args
+ .jobs
+ .or_else(|| std::thread::available_parallelism().ok())
+ .map(NonZeroUsize::get)
+ .unwrap_or(1);
+ let queue_size = args
+ .queue_size
+ .map(NonZeroUsize::get)
+ .unwrap_or_else(|| jobs.saturating_mul(1024));
+ let chunk_size = args
+ .chunk_size
+ .map(NonZeroU64::get)
+ .unwrap_or(1024 * 1024 * 1024);
+
+ let fs = find_files(&current_dir)
+ .filter_ok(|path| path != &db_path && path != &tmp_db_path && path != &backup_path)
+ .map(|item| {
+ item.map_err(|err| anyhow::Error::new(err).context("Failed to walk filesystem"))
+ })
+ .map_parallel_limit(jobs, queue_size, move |item| {
+ item.and_then(|path| {
+ Record::from_path(&path, chunk_size, terminate.clone())
+ .with_context(|| format!("Failed to read file {}", path.display()))
+ })
+ });
+
+ let mut counter = HashMap::new();
+ let mut denied = 0;
+
+ diff(db, fs).try_for_each(|item| match item {
+ Err(Either::Left(err)) => Err(err),
+ Err(Either::Right(err)) => Err(err),
+ Ok(diff) => {
+ *counter.entry(diff.kind()).or_insert(0) += 1;
+ record(&diff, &mut tmp).with_context(|| {
+ format!(
+ "Failed to write to temporary database {}",
+ tmp_db_path.display()
+ )
+ })?;
+ if args.deny.contains(&diff.kind()) {
+ if args.json {
+ println!("{}", serde_json::to_string(&diff).unwrap());
+ } else {
+ println!("{diff}");
+ }
+ denied += 1;
+ } else if args.verbosity > 1
+ || (diff.kind() != DiffKind::Unchanged && args.verbosity > 0)
+ {
+ if args.json {
+ println!("{}", serde_json::to_string(&diff).unwrap());
+ } else {
+ println!("{diff}");
+ }
+ }
+ Ok(())
+ }
+ })?;
+
+ if args.summary {
+ let total: usize = counter.values().sum();
+ let width = usize::try_from(total.checked_ilog10().unwrap_or(0) + 1).unwrap_or(0);
+ eprintln!(
+ "Removed: {:>width$}",
+ counter.get(&DiffKind::Removed).unwrap_or(&0)
+ );
+ eprintln!(
+ "Added: {:>width$}",
+ counter.get(&DiffKind::Added).unwrap_or(&0)
+ );
+ eprintln!(
+ "Unchanged: {:>width$}",
+ counter.get(&DiffKind::Unchanged).unwrap_or(&0)
+ );
+ eprintln!(
+ "Corrupted: {:>width$}",
+ counter.get(&DiffKind::Corrupted).unwrap_or(&0)
+ );
+ eprintln!(
+ "Touched: {:>width$}",
+ counter.get(&DiffKind::Touched).unwrap_or(&0)
+ );
+ eprintln!(
+ "Changed: {:>width$}",
+ counter.get(&DiffKind::Changed).unwrap_or(&0)
+ );
+ eprintln!("Total: {:>width$}", total);
+ }
+
+ if denied > 0 {
+ return Err(anyhow!(
+ "Found {} denied {}",
+ denied,
+ if denied == 1 {
+ "difference"
+ } else {
+ "differences"
+ }
+ ));
+ }
+
+ if args.dry_run {
+ eprintln!("Exiting due to dry run");
+ return Ok(());
+ }
+
+ tmp.finish().with_context(|| {
+ format!(
+ "Failed to synchronize temporary database {}",
+ tmp_db_path.display()
+ )
+ })?;
+ if !first_run {
+ copy_file(&db_path, tmp_backup_path, &backup_path).with_context(|| {
+ format!(
+ "Failed to backup database {} to {}",
+ db_path.display(),
+ backup_path.display()
+ )
+ })?;
+ }
+ rename_file(&tmp_db_path, &db_path).with_context(|| {
+ format!(
+ "Failed to persist temporary database {} to {}",
+ tmp_db_path.display(),
+ db_path.display()
+ )
+ })?;
+
+ Ok(())
+}
+
+fn record<T: std::io::Write>(diff: &Diff, mut writer: T) -> Result<(), std::io::Error> {
+ let file = match diff {
+ Diff::Removed { .. } => None,
+ Diff::Added { new, .. } => Some(new),
+ Diff::Unchanged { new, .. } => Some(new),
+ Diff::Corrupted { new, .. } => Some(new),
+ Diff::Touched { new, .. } => Some(new),
+ Diff::Changed { new, .. } => Some(new),
+ };
+ if let Some(file) = file {
+ let s = serde_json::to_string(file).unwrap();
+ writeln!(writer, "{}", s)
+ } else {
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn verify_clap_app() {
+ use clap::CommandFactory;
+ Args::command().debug_assert()
+ }
+}
Generated by cgit. See skreutz.com for my tech blog and contact information.