diff --git a/src/lib.rs b/src/lib.rs index 16bf1bc..b568c4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ use number_prefix::NumberPrefix; use walkdir::WalkDir; use std::{ + collections::BTreeMap, fmt::Display, path::{Path, PathBuf}, }; @@ -91,3 +92,95 @@ pub fn format_size(size: u64, binary: bool) -> String { NumberPrefix::Prefixed(prefix, number) => format!("{:.2} {}B", number, prefix), } } + +/// Calculate the sum of sizes of all entries +/// +/// Does not account twice the size of entries that are within other entries. +/// Implementation uses BTreeMap to build a path Trie. +pub fn calculate_unique_total_size(entries: &[Entry]) -> u64 { + let mut filtered_entries = Vec::<&Entry>::new(); + let mut canonicalized_paths = Vec::::new(); + + // Canonicalize each path, silently ignoring failures. + // TODO: Review if we should ignore failures. + for entry in entries { + if let Ok(path) = entry.path.canonicalize() { + filtered_entries.push(entry); + canonicalized_paths.push(path); + } + } + + #[derive(PartialEq, Eq, PartialOrd, Ord)] + struct TriePathNode { + // Children nodes of this current path, accessed by path. + children: BTreeMap, + // Size of the file that ends at this node. + node_size: u64, + } + + let mut trie_root = TriePathNode { + children: BTreeMap::new(), + node_size: 0, + }; + + // For each entry/path, add it to the Trie if it wasn't already inserted. + // + // If the Trie receives a folder that is parent of a previously added file, then just consider + // the parent folder, removing the childs, this way, we do not count them twice towards the + // final total. + for (i, entry) in filtered_entries.iter().enumerate() { + let path = &canonicalized_paths[i]; + + // Necessary because we need to check when it's the last path piece. + let mut path_iter = path.iter().peekable(); + // Pointer to traverse the tree. + let mut current_trie_node = &mut trie_root; + // Size to be added at the endif the current entry isn't children of any other. + let size_of_current_file = entry.size; + + while let Some(piece) = path_iter.next() { + // Query for the node in the Trie which matches the current path piece. + let entry = current_trie_node.children.entry(PathBuf::from(piece)); + + // Keeps track if the current entry is child of another previously found. + let mut already_considered = false; + let next_trie_node = entry + .and_modify(|_| { + // If we are in this block, it means that the node size was already considered + // because a parent of it was inserted. So we will skip this file. + already_considered = true; + }) + // Add a node with 0 size, which may be changed after if it is the last piece. + .or_insert(TriePathNode { + children: BTreeMap::new(), + node_size: 0, + }); + + // Skipping already accounted file, because it is nested inside of another one. + if already_considered { + break; + } + + // If we are at the last piece of the current entry path, it means that this is the tip + // that finally represents the file, and which path is the full file path. + let is_the_last_piece = path_iter.peek().is_none(); + if is_the_last_piece { + // Update the size of this piece. + next_trie_node.node_size = size_of_current_file; + // Drop all the childrens so that their sizes won't be added. + next_trie_node.children.clear(); + } + + // Update the pointer to keep traversing the trie. + current_trie_node = next_trie_node; + } + } + + fn trie_recursive_sum(node: &TriePathNode) -> u64 { + let children_sum: u64 = node.children.values().map(trie_recursive_sum).sum(); + node.node_size + children_sum + } + + // Traverse the trie tree to calculate the sum + trie_recursive_sum(&trie_root) +} diff --git a/src/main.rs b/src/main.rs index b5ebd82..a58c9cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,7 @@ use wild; mod cli; use cli::Cli; -use durt::{format_size, Entry}; +use durt::{calculate_unique_total_size, format_size, Entry}; fn main() { #[cfg(windows)] @@ -60,7 +60,7 @@ fn main() { Table::new(" {:>} {:<}") }; - let total_size = entries.iter().map(|e| e.size).sum(); + let total_size = calculate_unique_total_size(&entries); let mut omitted_entries = 0; for entry in entries {