//! Core functionality. //! //! This module contains Brck's core types and functions. use std::{ cmp::Ordering, fs::OpenOptions, io::{BufReader, Read}, path::{Path, PathBuf}, sync::{atomic::AtomicBool, Arc}, }; use chrono::{DateTime, Utc}; use either::Either; use flate2::bufread::GzDecoder; use itertools::{EitherOrBoth, Itertools}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use walkdir::WalkDir; /// Produces an iterator over the regular files below the given path. /// /// The files are sorted by their full path. /// Symlinks are not followed, except for the original `path`. pub fn find_files>( path: P, ) -> impl Iterator> { WalkDir::new(path.as_ref()) .sort_by_path() .into_iter() .filter_ok(|entry| entry.file_type().is_file()) .map_ok(|entry| entry.into_path()) } /// Extension for [WalkDir]. trait WalkDirExt { /// Sort directory entries by full path. /// /// Yiedls "foo bar/buzz" before "foo/buzz". fn sort_by_path(self) -> WalkDir; } impl WalkDirExt for WalkDir { fn sort_by_path(self) -> WalkDir { self.sort_by_key(|a| { if a.file_type().is_dir() { let mut name = a.file_name().to_os_string(); name.push(std::path::MAIN_SEPARATOR.to_string()); name } else { a.file_name().to_os_string() } }) } } /// Returns an iterator over the records of the given database. pub fn read_db>( path: P, ) -> Result>, std::io::Error> { let file = OpenOptions::new().read(true).open(path)?; let buf = BufReader::new(file); let dec = GzDecoder::new(buf); let iter = serde_json::Deserializer::from_reader(dec).into_iter::(); Ok(iter) } /// A record of a file's modification time and hash sum. #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct Record { pub path: PathBuf, pub modified: DateTime, #[serde(with = "hex::serde")] pub sha256: Vec, } impl Record { /// Constructs a `Record` of the given file. /// /// This function reads the given file in chunks of at most `chunk_size` bytes. It checks the /// `terminate` flag before reading each chunk, returning the `RecordError::Interrupt` error /// when set. pub fn from_path>( path: P, chunk_size: u64, terminate: Arc, ) -> Result { let modified = std::fs::symlink_metadata(&path)?.modified()?; let mut file = std::fs::File::open(&path)?; let mut hasher = Sha256::new(); loop { if terminate.load(std::sync::atomic::Ordering::SeqCst) { return Err(RecordError::Interrupt); } let mut reader = std::io::Read::by_ref(&mut file).take(chunk_size); if std::io::copy(&mut reader, &mut hasher)? == 0 { break; } } let hash = hasher.finalize(); Ok(Record { path: path.as_ref().into(), modified: modified.into(), sha256: hash.as_slice().into(), }) } } /// The possible errors of [Record::from_path]. #[derive(Debug)] pub enum RecordError { IO(std::io::Error), Interrupt, } impl std::fmt::Display for RecordError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { RecordError::IO(err) => write!(f, "{}", err), RecordError::Interrupt => write!(f, "Received interrupt signal"), } } } impl std::error::Error for RecordError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { RecordError::IO(err) => Some(err), RecordError::Interrupt => None, } } } impl From for RecordError { fn from(value: std::io::Error) -> Self { RecordError::IO(value) } } /// Returns an iterator over the difference of the given files. pub fn diff(cached: I, found: J) -> impl Iterator>> where I: Iterator>, J: Iterator>, { cached .merge_join_by(found, |cached, found| match (cached, found) { (Err(_), _) => Ordering::Less, (Ok(_), Err(_)) => Ordering::Greater, (Ok(cached), Ok(found)) => cached.path.cmp(&found.path), }) .map(|item| match item { EitherOrBoth::Left(Err(err)) => Err(Either::Left(err)), EitherOrBoth::Right(Err(err)) => Err(Either::Right(err)), EitherOrBoth::Left(Ok(old)) => Ok(Diff::Removed { old }), EitherOrBoth::Right(Ok(new)) => Ok(Diff::Added { new }), EitherOrBoth::Both(Ok(old), Ok(new)) => { if old.modified == new.modified { if old.sha256 == new.sha256 { Ok(Diff::Unchanged { old, new }) } else { Ok(Diff::Corrupted { old, new }) } } else if old.sha256 == new.sha256 { Ok(Diff::Touched { old, new }) } else { Ok(Diff::Changed { old, new }) } } _ => unreachable!(), }) } /// The list of possible types of differences. #[derive(Debug, Clone, Hash, Eq, PartialEq, clap::ValueEnum)] pub enum DiffKind { Added, Touched, Changed, Unchanged, Removed, Corrupted, } /// The difference between two records of a file. #[derive(Debug, Clone, Eq, PartialEq, Serialize)] #[serde(tag = "type", rename_all = "lowercase")] pub enum Diff { // Modeled variants as structs to improve their JSON representation. Added { new: Record }, Touched { old: Record, new: Record }, Changed { old: Record, new: Record }, Unchanged { old: Record, new: Record }, Removed { old: Record }, Corrupted { old: Record, new: Record }, } impl Diff { pub fn kind(&self) -> DiffKind { match self { Diff::Added { .. } => DiffKind::Added, Diff::Touched { .. } => DiffKind::Touched, Diff::Changed { .. } => DiffKind::Changed, Diff::Unchanged { .. } => DiffKind::Unchanged, Diff::Removed { .. } => DiffKind::Removed, Diff::Corrupted { .. } => DiffKind::Corrupted, } } } impl std::fmt::Display for Diff { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Diff::Added { new, .. } => write!(f, "added: {}", new.path.display()), Diff::Touched { old, .. } => write!(f, "touched: {}", old.path.display()), Diff::Changed { old, .. } => write!(f, "changed: {}", old.path.display()), Diff::Unchanged { old, .. } => write!(f, "unchanged: {}", old.path.display()), Diff::Removed { old, .. } => write!(f, "removed: {}", old.path.display()), Diff::Corrupted { old, .. } => write!(f, "corrupted: {}", old.path.display()), } } } #[cfg(test)] mod tests { use testresult::TestResult; use super::*; fn record(path: &str, modified: &str, sha256: &str) -> Record { Record { path: path.into(), modified: DateTime::parse_from_rfc3339(modified).unwrap().into(), sha256: sha256.into(), } } #[test] fn diff_works() -> TestResult { let a = record("a", "2024-03-22T00:00:00Z", "apple"); let b = record("b", "2024-03-22T00:00:00Z", "banana"); let b_touched = record("b", "2024-03-22T01:00:00Z", "banana"); let c = record("c", "2024-03-22T00:00:00Z", "cherry"); let c_changed = record("c", "2024-03-22T01:00:00Z", "cashew"); let d = record("d", "2024-03-22T00:00:00Z", "date"); let e = record("e", "2024-03-22T00:00:00Z", "elderberry"); let f = record("f", "2024-03-22T00:00:00Z", "fig"); let f_corrupted = record("f", "2024-03-22T00:00:00Z", "feijoa"); let db = [ Err("foo"), Ok(b.clone()), Ok(c.clone()), Ok(d.clone()), Ok(e.clone()), Ok(f.clone()), ]; let fs = [ Ok(a.clone()), Ok(b_touched.clone()), Err("bar"), Ok(c_changed.clone()), Ok(d.clone()), Ok(f_corrupted.clone()), ]; let mut result = diff(db.into_iter(), fs.into_iter()); assert_eq!(result.next(), Some(Err(Either::Left("foo")))); assert_eq!(result.next(), Some(Ok(Diff::Added { new: a }))); assert_eq!( result.next(), Some(Ok(Diff::Touched { old: b, new: b_touched })) ); assert_eq!(result.next(), Some(Err(Either::Right("bar")))); assert_eq!( result.next(), Some(Ok(Diff::Changed { old: c, new: c_changed })) ); assert_eq!( result.next(), Some(Ok(Diff::Unchanged { old: d.clone(), new: d })) ); assert_eq!(result.next(), Some(Ok(Diff::Removed { old: e }))); assert_eq!( result.next(), Some(Ok(Diff::Corrupted { old: f, new: f_corrupted })) ); assert_eq!(result.next(), None); Ok(()) } }