summaryrefslogtreecommitdiff
path: root/src/core.rs
diff options
context:
space:
mode:
authorStefan Kreutz <mail@skreutz.com>2024-03-24 15:04:09 +0100
committerStefan Kreutz <mail@skreutz.com>2024-03-24 15:04:09 +0100
commitc1fa48e9bd617d70e823efef5d6dcea41b1d2087 (patch)
tree421e69c512ac54bf65495ef23fd7d9ec5a5e67d5 /src/core.rs
downloadbrck-0.1.0.tar
Add initial implementationbrck-0.1.0
Diffstat (limited to 'src/core.rs')
-rw-r--r--src/core.rs306
1 files changed, 306 insertions, 0 deletions
diff --git a/src/core.rs b/src/core.rs
new file mode 100644
index 0000000..6ac3ba5
--- /dev/null
+++ b/src/core.rs
@@ -0,0 +1,306 @@
+//! Core functionality.
+//!
+//! This module contains Brck's core types and functions.
+
+use std::{
+ cmp::Ordering,
+ fs::OpenOptions,
+ io::{BufReader, Read},
+ path::{Path, PathBuf},
+ sync::{atomic::AtomicBool, Arc},
+};
+
+use chrono::{DateTime, Utc};
+use either::Either;
+use flate2::bufread::GzDecoder;
+use itertools::{EitherOrBoth, Itertools};
+use serde::{Deserialize, Serialize};
+use sha2::{Digest, Sha256};
+use walkdir::WalkDir;
+
+/// Produces an iterator over the regular files below the given path.
+///
+/// The files are sorted by their full path.
+/// Symlinks are not followed, except for the original `path`.
+pub fn find_files<P: AsRef<Path>>(
+ path: P,
+) -> impl Iterator<Item = Result<PathBuf, walkdir::Error>> {
+ WalkDir::new(path.as_ref())
+ .sort_by_path()
+ .into_iter()
+ .filter_ok(|entry| entry.file_type().is_file())
+ .map_ok(|entry| entry.into_path())
+}
+
+/// Extension for [WalkDir].
+trait WalkDirExt {
+ /// Sort directory entries by full path.
+ ///
+ /// Yiedls "foo bar/buzz" before "foo/buzz".
+ fn sort_by_path(self) -> WalkDir;
+}
+
+impl WalkDirExt for WalkDir {
+ fn sort_by_path(self) -> WalkDir {
+ self.sort_by_key(|a| {
+ if a.file_type().is_dir() {
+ let mut name = a.file_name().to_os_string();
+ name.push(std::path::MAIN_SEPARATOR.to_string());
+ name
+ } else {
+ a.file_name().to_os_string()
+ }
+ })
+ }
+}
+
+/// Returns an iterator over the records of the given database.
+pub fn read_db<P: AsRef<Path>>(
+ path: P,
+) -> Result<impl Iterator<Item = Result<Record, serde_json::Error>>, std::io::Error> {
+ let file = OpenOptions::new().read(true).open(path)?;
+ let buf = BufReader::new(file);
+ let dec = GzDecoder::new(buf);
+ let iter = serde_json::Deserializer::from_reader(dec).into_iter::<Record>();
+ Ok(iter)
+}
+
+/// A record of a file's modification time and hash sum.
+#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
+pub struct Record {
+ pub path: PathBuf,
+ pub modified: DateTime<Utc>,
+ #[serde(with = "hex::serde")]
+ pub sha256: Vec<u8>,
+}
+
+impl Record {
+ /// Constructs a `Record` of the given file.
+ ///
+ /// This function reads the given file in chunks of at most `chunk_size` bytes. It checks the
+ /// `terminate` flag before reading each chunk, returning the `RecordError::Interrupt` error
+ /// when set.
+ pub fn from_path<P: AsRef<Path>>(
+ path: P,
+ chunk_size: u64,
+ terminate: Arc<AtomicBool>,
+ ) -> Result<Record, RecordError> {
+ let modified = std::fs::symlink_metadata(&path)?.modified()?;
+ let mut file = std::fs::File::open(&path)?;
+ let mut hasher = Sha256::new();
+ loop {
+ if terminate.load(std::sync::atomic::Ordering::SeqCst) {
+ return Err(RecordError::Interrupt);
+ }
+ let mut reader = std::io::Read::by_ref(&mut file).take(chunk_size);
+ if std::io::copy(&mut reader, &mut hasher)? == 0 {
+ break;
+ }
+ }
+ let hash = hasher.finalize();
+ Ok(Record {
+ path: path.as_ref().into(),
+ modified: modified.into(),
+ sha256: hash.as_slice().into(),
+ })
+ }
+}
+
+/// The possible errors of [Record::from_path].
+#[derive(Debug)]
+pub enum RecordError {
+ IO(std::io::Error),
+ Interrupt,
+}
+
+impl std::fmt::Display for RecordError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ RecordError::IO(err) => write!(f, "{}", err),
+ RecordError::Interrupt => write!(f, "Received interrupt signal"),
+ }
+ }
+}
+
+impl std::error::Error for RecordError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ match self {
+ RecordError::IO(err) => Some(err),
+ RecordError::Interrupt => None,
+ }
+ }
+}
+
+impl From<std::io::Error> for RecordError {
+ fn from(value: std::io::Error) -> Self {
+ RecordError::IO(value)
+ }
+}
+
+/// Returns an iterator over the difference of the given files.
+pub fn diff<I, J, E, F>(cached: I, found: J) -> impl Iterator<Item = Result<Diff, Either<E, F>>>
+where
+ I: Iterator<Item = Result<Record, E>>,
+ J: Iterator<Item = Result<Record, F>>,
+{
+ cached
+ .merge_join_by(found, |cached, found| match (cached, found) {
+ (Err(_), _) => Ordering::Less,
+ (Ok(_), Err(_)) => Ordering::Greater,
+ (Ok(cached), Ok(found)) => cached.path.cmp(&found.path),
+ })
+ .map(|item| match item {
+ EitherOrBoth::Left(Err(err)) => Err(Either::Left(err)),
+ EitherOrBoth::Right(Err(err)) => Err(Either::Right(err)),
+ EitherOrBoth::Left(Ok(old)) => Ok(Diff::Removed { old }),
+ EitherOrBoth::Right(Ok(new)) => Ok(Diff::Added { new }),
+ EitherOrBoth::Both(Ok(old), Ok(new)) => {
+ if old.modified == new.modified {
+ if old.sha256 == new.sha256 {
+ Ok(Diff::Unchanged { old, new })
+ } else {
+ Ok(Diff::Corrupted { old, new })
+ }
+ } else if old.sha256 == new.sha256 {
+ Ok(Diff::Touched { old, new })
+ } else {
+ Ok(Diff::Changed { old, new })
+ }
+ }
+ _ => unreachable!(),
+ })
+}
+
+/// The list of possible types of differences.
+#[derive(Debug, Clone, Hash, Eq, PartialEq, clap::ValueEnum)]
+pub enum DiffKind {
+ Added,
+ Touched,
+ Changed,
+ Unchanged,
+ Removed,
+ Corrupted,
+}
+
+/// The difference between two records of a file.
+#[derive(Debug, Clone, Eq, PartialEq, Serialize)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum Diff {
+ // Modeled variants as structs to improve their JSON representation.
+ Added { new: Record },
+ Touched { old: Record, new: Record },
+ Changed { old: Record, new: Record },
+ Unchanged { old: Record, new: Record },
+ Removed { old: Record },
+ Corrupted { old: Record, new: Record },
+}
+
+impl Diff {
+ pub fn kind(&self) -> DiffKind {
+ match self {
+ Diff::Added { .. } => DiffKind::Added,
+ Diff::Touched { .. } => DiffKind::Touched,
+ Diff::Changed { .. } => DiffKind::Changed,
+ Diff::Unchanged { .. } => DiffKind::Unchanged,
+ Diff::Removed { .. } => DiffKind::Removed,
+ Diff::Corrupted { .. } => DiffKind::Corrupted,
+ }
+ }
+}
+
+impl std::fmt::Display for Diff {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Diff::Added { new, .. } => write!(f, "added: {}", new.path.display()),
+ Diff::Touched { old, .. } => write!(f, "touched: {}", old.path.display()),
+ Diff::Changed { old, .. } => write!(f, "changed: {}", old.path.display()),
+ Diff::Unchanged { old, .. } => write!(f, "unchanged: {}", old.path.display()),
+ Diff::Removed { old, .. } => write!(f, "removed: {}", old.path.display()),
+ Diff::Corrupted { old, .. } => write!(f, "corrupted: {}", old.path.display()),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use testresult::TestResult;
+
+ use super::*;
+
+ fn record(path: &str, modified: &str, sha256: &str) -> Record {
+ Record {
+ path: path.into(),
+ modified: DateTime::parse_from_rfc3339(modified).unwrap().into(),
+ sha256: sha256.into(),
+ }
+ }
+
+ #[test]
+ fn diff_works() -> TestResult {
+ let a = record("a", "2024-03-22T00:00:00Z", "apple");
+ let b = record("b", "2024-03-22T00:00:00Z", "banana");
+ let b_touched = record("b", "2024-03-22T01:00:00Z", "banana");
+ let c = record("c", "2024-03-22T00:00:00Z", "cherry");
+ let c_changed = record("c", "2024-03-22T01:00:00Z", "cashew");
+ let d = record("d", "2024-03-22T00:00:00Z", "date");
+ let e = record("e", "2024-03-22T00:00:00Z", "elderberry");
+ let f = record("f", "2024-03-22T00:00:00Z", "fig");
+ let f_corrupted = record("f", "2024-03-22T00:00:00Z", "feijoa");
+
+ let db = [
+ Err("foo"),
+ Ok(b.clone()),
+ Ok(c.clone()),
+ Ok(d.clone()),
+ Ok(e.clone()),
+ Ok(f.clone()),
+ ];
+
+ let fs = [
+ Ok(a.clone()),
+ Ok(b_touched.clone()),
+ Err("bar"),
+ Ok(c_changed.clone()),
+ Ok(d.clone()),
+ Ok(f_corrupted.clone()),
+ ];
+
+ let mut result = diff(db.into_iter(), fs.into_iter());
+
+ assert_eq!(result.next(), Some(Err(Either::Left("foo"))));
+ assert_eq!(result.next(), Some(Ok(Diff::Added { new: a })));
+ assert_eq!(
+ result.next(),
+ Some(Ok(Diff::Touched {
+ old: b,
+ new: b_touched
+ }))
+ );
+ assert_eq!(result.next(), Some(Err(Either::Right("bar"))));
+ assert_eq!(
+ result.next(),
+ Some(Ok(Diff::Changed {
+ old: c,
+ new: c_changed
+ }))
+ );
+ assert_eq!(
+ result.next(),
+ Some(Ok(Diff::Unchanged {
+ old: d.clone(),
+ new: d
+ }))
+ );
+ assert_eq!(result.next(), Some(Ok(Diff::Removed { old: e })));
+ assert_eq!(
+ result.next(),
+ Some(Ok(Diff::Corrupted {
+ old: f,
+ new: f_corrupted
+ }))
+ );
+ assert_eq!(result.next(), None);
+
+ Ok(())
+ }
+}
Generated by cgit. See skreutz.com for my tech blog and contact information.