diff --git a/Cargo.lock b/Cargo.lock index 794b75ed02..7f45c53f2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -183,6 +194,7 @@ dependencies = [ "cc", "clap", "const_format", + "hashbrown 0.12.3", "itertools", "lazy_static", "libc", @@ -260,12 +272,32 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -291,7 +323,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.11.2", ] [[package]] @@ -690,6 +722,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index b3db13bf4a..e64d630edb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ clap = { version = "3.1.8", features = ["cargo", "env", "wrap_help"] } itertools = "0.10.1" typed-arena = "2.0.1" rustc-hash = "1.1.0" +hashbrown = "0.12.3" strsim = "0.10.0" lazy_static = "1.4.0" atty = "0.2.14" diff --git a/src/diff/changes.rs b/src/diff/changes.rs index 5844b9c77f..493b7ff421 100644 --- a/src/diff/changes.rs +++ b/src/diff/changes.rs @@ -26,27 +26,32 @@ impl<'a> ChangeMap<'a> { pub fn get(&self, node: &Syntax<'a>) -> Option> { self.changes.get(&node.id()).copied() } + + pub fn reserve(&mut self, additional: usize) { + self.changes.reserve(additional); + } } pub fn insert_deep_unchanged<'a>( - node: &'a Syntax<'a>, - opposite_node: &'a Syntax<'a>, + lhs: &'a Syntax<'a>, + rhs: &'a Syntax<'a>, change_map: &mut ChangeMap<'a>, ) { - change_map.insert(node, ChangeKind::Unchanged(opposite_node)); + change_map.insert(lhs, ChangeKind::Unchanged(rhs)); + change_map.insert(rhs, ChangeKind::Unchanged(lhs)); - match (node, opposite_node) { + match (lhs, rhs) { ( Syntax::List { - children: node_children, + children: lhs_children, .. }, Syntax::List { - children: opposite_children, + children: rhs_children, .. }, ) => { - for (child, opposite_child) in node_children.iter().zip(opposite_children) { + for (child, opposite_child) in lhs_children.iter().zip(rhs_children) { insert_deep_unchanged(child, opposite_child, change_map); } } diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index f9313c12f6..acacacc0cd 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -1,56 +1,64 @@ //! Implements Dijkstra's algorithm for shortest path, to find an //! optimal and readable diff between two ASTs. -use std::{cmp::Reverse, env}; +use std::{cmp::Reverse, env, iter::successors}; use crate::{ diff::changes::ChangeMap, - diff::graph::{get_set_neighbours, populate_change_map, Edge, Vertex}, + diff::graph::{get_neighbours, populate_change_map, Edge, SeenMap, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; use itertools::Itertools; use radix_heap::RadixHeapMap; -use rustc_hash::FxHashMap; #[derive(Debug)] pub struct ExceededGraphLimit {} -/// Return the shortest route from `start` to the end vertex. -fn shortest_vertex_path<'a, 'b>( - start: &'b Vertex<'a, 'b>, +/// Return the shortest route from the `start` to the end vertex. +/// +/// The vec returned does not return the very last vertex. This is +/// necessary because a route of N vertices only has N-1 edges. +fn shortest_path<'a, 'b>( + start: Vertex<'a, 'b>, vertex_arena: &'b Bump, size_hint: usize, graph_limit: usize, -) -> Result>, ExceededGraphLimit> { +) -> Result)>, ExceededGraphLimit> { // We want to visit nodes with the shortest distance first, but // RadixHeapMap is a max-heap. Ensure nodes are wrapped with // Reverse to flip comparisons. let mut heap: RadixHeapMap, &'b Vertex<'a, 'b>> = RadixHeapMap::new(); - heap.push(Reverse(0), start); + heap.push(Reverse(0), vertex_arena.alloc(start)); - let mut seen = FxHashMap::default(); + let mut seen = SeenMap::default(); seen.reserve(size_hint); + let mut neighbors = Vec::with_capacity(4); + let end: &'b Vertex<'a, 'b> = loop { match heap.pop() { Some((Reverse(distance), current)) => { + if current.is_visited.get() { + continue; + } + current.is_visited.set(true); + if current.is_end() { break current; } - for neighbour in &get_set_neighbours(current, vertex_arena, &mut seen) { - let (edge, next) = neighbour; - let distance_to_next = distance + edge.cost(); + neighbors.clear(); + get_neighbours(current, vertex_arena, &mut seen, &mut neighbors); - let found_shorter_route = match next.predecessor.get() { - Some((prev_shortest, _)) => distance_to_next < prev_shortest, - None => true, - }; + for &(edge, cost, next) in neighbors.iter() { + let distance_to_next = distance + cost; - if found_shorter_route { - next.predecessor.replace(Some((distance_to_next, current))); + if distance_to_next < next.shortest_distance.get() { + next.shortest_distance.set(distance_to_next); + next.pred_edge.set(Some(edge)); + next.pred_vertex.set(Some(current)); heap.push(Reverse(distance_to_next), next); } } @@ -70,113 +78,30 @@ fn shortest_vertex_path<'a, 'b>( heap.len(), ); - let mut current = Some((0, end)); - let mut vertex_route: Vec<&'b Vertex<'a, 'b>> = vec![]; - while let Some((_, node)) = current { - vertex_route.push(node); - current = node.predecessor.get(); - } - + let mut vertex_route = successors( + end.pred_edge.get().zip(end.pred_vertex.get()), + |&(_, node)| node.pred_edge.get().zip(node.pred_vertex.get()), + ) + .collect::>(); vertex_route.reverse(); Ok(vertex_route) } -fn shortest_path_with_edges<'a, 'b>( - route: &[&'b Vertex<'a, 'b>], -) -> Vec<(Edge, &'b Vertex<'a, 'b>)> { - let mut prev = route.first().expect("Expected non-empty route"); - - let mut cost = 0; - let mut res = vec![]; - - for vertex in route.iter().skip(1) { - let edge = edge_between(prev, vertex); - res.push((edge, *prev)); - cost += edge.cost(); - - prev = vertex; - } - debug!("Found a path of {} with cost {}.", route.len(), cost); - - res -} - -/// Return the shortest route from the `start` to the end vertex. -/// -/// The vec returned does not return the very last vertex. This is -/// necessary because a route of N vertices only has N-1 edges. -fn shortest_path<'a, 'b>( - start: Vertex<'a, 'b>, - vertex_arena: &'b Bump, - size_hint: usize, - graph_limit: usize, -) -> Result)>, ExceededGraphLimit> { - let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start.clone()); - let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?; - Ok(shortest_path_with_edges(&vertex_path)) -} - -fn edge_between<'a, 'b>(before: &Vertex<'a, 'b>, after: &Vertex<'a, 'b>) -> Edge { - assert_ne!(before, after); - - let mut shortest_edge: Option = None; - if let Some(neighbours) = &*before.neighbours.borrow() { - for neighbour in neighbours { - let (edge, next) = *neighbour; - // If there are multiple edges that can take us to `next`, - // prefer the shortest. - if *next == *after { - let is_shorter = match shortest_edge { - Some(prev_edge) => edge.cost() < prev_edge.cost(), - None => true, - }; - - if is_shorter { - shortest_edge = Some(edge); - } - } - } - } - - if let Some(edge) = shortest_edge { - return edge; - } - - panic!( - "Expected a route between the two vertices {:#?} and {:#?}", - before, after - ); -} - /// What is the total number of AST nodes? fn node_count(root: Option<&Syntax>) -> u32 { - let mut node = root; - let mut count = 0; - while let Some(current_node) = node { - let current_count = match current_node { + successors(root, |node| node.next_sibling()) + .map(|node| match node { Syntax::List { num_descendants, .. } => *num_descendants, Syntax::Atom { .. } => 1, - }; - count += current_count; - - node = current_node.next_sibling(); - } - - count + }) + .sum::() } /// How many top-level AST nodes do we have? fn tree_count(root: Option<&Syntax>) -> u32 { - let mut node = root; - let mut count = 0; - while let Some(current_node) = node { - count += 1; - node = current_node.next_sibling(); - } - - count + successors(root, |node| node.next_sibling()).count() as u32 } pub fn mark_syntax<'a>( @@ -202,7 +127,7 @@ pub fn mark_syntax<'a>( let size_hint = lhs_node_count * rhs_node_count; let start = Vertex::new(lhs_syntax, rhs_syntax); - let vertex_arena = Bump::new(); + let vertex_arena = Bump::with_capacity(size_hint * std::mem::size_of::()); let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?; @@ -218,12 +143,13 @@ pub fn mark_syntax<'a>( .iter() .map(|x| { format!( - "{:20} {:20} --- {:3} {:?}", + "{:20} {:20} --- {:?}", x.1.lhs_syntax + .get_side() .map_or_else(|| "None".into(), Syntax::dbg_content), x.1.rhs_syntax + .get_side() .map_or_else(|| "None".into(), Syntax::dbg_content), - x.0.cost(), x.0, ) }) @@ -279,12 +205,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![UnchangedNode { - depth_difference: 0 - }] - ); + assert_eq!(actions, vec![UnchangedNode]); } #[test] @@ -320,19 +241,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - EnterUnchangedDelimiter { - depth_difference: 0 - }, - NovelAtomLHS { - contiguous: false, - probably_punctuation: false, - }, - ExitDelimiterBoth, - ] - ); + assert_eq!(actions, vec![EnterUnchangedDelimiter, NovelLhs,]); } #[test] @@ -366,23 +275,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - EnterUnchangedDelimiter { - depth_difference: 0 - }, - NovelAtomRHS { - contiguous: false, - probably_punctuation: false - }, - NovelAtomRHS { - contiguous: false, - probably_punctuation: false - }, - ExitDelimiterBoth, - ] - ); + assert_eq!(actions, vec![EnterUnchangedDelimiter, NovelRhs, NovelRhs]); } #[test] @@ -421,18 +314,7 @@ mod tests { let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( actions, - vec![ - EnterNovelDelimiterRHS { contiguous: false }, - EnterNovelDelimiterLHS { contiguous: false }, - UnchangedNode { - depth_difference: 0 - }, - UnchangedNode { - depth_difference: 0 - }, - ExitDelimiterRHS, - ExitDelimiterLHS, - ], + vec![NovelRhs, NovelLhs, UnchangedNode, UnchangedNode], ); } @@ -459,22 +341,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - UnchangedNode { - depth_difference: 0 - }, - NovelAtomLHS { - contiguous: false, - probably_punctuation: false - }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: false - }, - ] - ); + assert_eq!(actions, vec![UnchangedNode, NovelLhs, NovelLhs,]); } #[test] @@ -503,17 +370,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - EnterNovelDelimiterLHS { contiguous: false }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: false - }, - ExitDelimiterLHS, - ] - ); + assert_eq!(actions, vec![NovelLhs, NovelLhs,]); } #[test] @@ -545,21 +402,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - EnterNovelDelimiterLHS { contiguous: false }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: false - }, - ExitDelimiterLHS, - NovelAtomLHS { - contiguous: true, - probably_punctuation: true - }, - ] - ); + assert_eq!(actions, vec![NovelLhs, NovelLhs, NovelLhs]); } #[test] @@ -586,12 +429,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ReplacedComment { - levenshtein_pct: 84 - }] - ); + assert_eq!(actions, vec![ReplacedComment]); } #[test] @@ -618,12 +456,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ReplacedComment { - levenshtein_pct: 11 - }] - ); + assert_eq!(actions, vec![NovelComment]); } #[test] @@ -658,18 +491,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - ReplacedComment { - levenshtein_pct: 95 - }, - NovelAtomLHS { - contiguous: false, - probably_punctuation: false - } - ] - ); + assert_eq!(actions, vec![ReplacedComment, NovelLhs]); } #[test] diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 23f9c30633..625f9f8f19 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -1,23 +1,79 @@ //! A graph representation for computing tree diffs. use bumpalo::Bump; -use rustc_hash::FxHashMap; +use rustc_hash::FxHasher; use std::{ - cell::{Cell, RefCell}, + cell::Cell, cmp::min, - fmt, - hash::{Hash, Hasher}, + hash::{BuildHasherDefault, Hash, Hasher}, + marker::PhantomData, }; use strsim::normalized_levenshtein; use crate::{ - diff::{ - changes::{insert_deep_unchanged, ChangeKind, ChangeMap}, - stack::Stack, - }, - parse::syntax::{AtomKind, Syntax, SyntaxId}, + diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}, + parse::syntax::{AtomKind, Syntax}, }; -use Edge::*; + +/// Compress two `&Syntax` into a pointer. +/// +/// Utilize the LSB as flag since `&Syntax` is aligned. +/// +/// ```text +/// LSB = 0 -> side syntax +/// LSB = 1 -> optional parent syntax +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SideSyntax<'a> { + data: *const u8, + phantom: PhantomData<&'a Syntax<'a>>, +} + +impl<'a> SideSyntax<'a> { + pub fn is_side(&self) -> bool { + self.data as usize & 1 == 0 + } + + pub fn is_parent(&self) -> bool { + self.data as usize & 1 == 1 + } + + pub fn get_side(&self) -> Option<&'a Syntax<'a>> { + if self.is_side() { + let data = self.data; + unsafe { std::mem::transmute(data) } + } else { + None + } + } + + pub fn get_parent(&self) -> Option<&'a Syntax<'a>> { + if self.is_parent() { + let data = self.data.wrapping_sub(1); + unsafe { std::mem::transmute(data) } + } else { + None + } + } + + pub fn from_side(side: &'a Syntax<'a>) -> Self { + Self { + data: side as *const _ as _, + phantom: PhantomData, + } + } + + pub fn from_parent(parent: Option<&'a Syntax<'a>>) -> Self { + Self { + data: (get_ptr(parent) as *const u8).wrapping_add(1), + phantom: PhantomData, + } + } +} + +fn get_ptr(opt: Option<&T>) -> *const T { + opt.map_or(std::ptr::null::(), |t| t as *const _) +} /// A vertex in a directed acyclic graph that represents a diff. /// @@ -32,7 +88,7 @@ use Edge::*; /// ^ ^ /// ``` /// -/// From this vertex, we could take [`Edge::NovelAtomLHS`], bringing +/// From this vertex, we could take [`Edge::NovelLhs`], bringing /// us to this vertex. /// /// ```text @@ -40,372 +96,192 @@ use Edge::*; /// ^ ^ /// ``` /// -/// Alternatively, we could take the [`Edge::NovelAtomRHS`], bringing us +/// Alternatively, we could take the [`Edge::NovelRhs`], bringing us /// to this vertex. /// /// ```text /// LHS: X A RHS: A /// ^ ^ /// ``` -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct Vertex<'a, 'b> { - pub neighbours: RefCell)>>>, - pub predecessor: Cell)>>, - pub lhs_syntax: Option<&'a Syntax<'a>>, - pub rhs_syntax: Option<&'a Syntax<'a>>, - parents: Stack>, - lhs_parent_id: Option, - rhs_parent_id: Option, - can_pop_either: bool, + // states + pub is_visited: Cell, + pub shortest_distance: Cell, + pub pred_edge: Cell>, + pub pred_vertex: Cell>>, + + // core info + pub lhs_syntax: SideSyntax<'a>, + pub rhs_syntax: SideSyntax<'a>, + // If we've entered the LHS delimiter and RHS delimiter together, + // we must pop both sides together too. Otherwise we'd consider + // the following case to have no changes. + // + // ```text + // Old: (a b c) + // New: (a b) c + // ``` + // + // If we've entered the LHS delimiter or RHS delimiter separately, + // we can pop either side independently. + // + // So each entered delimiter belongs to one of two types, either + // PopBoth or PopEither. They form two stacks that are represented + // in the following structure. + // + // ```text + // | 3, 2, None + // | |PopEither| |PopEither| <--+ + // | |PopEither| |PopEither| | + // | |PopEither| | + // | | + // | 0, 0, Some ---------+ + // | | PopBoth | | PopBoth | <--+ + // | | + // | 1, 2, Some ---------+ + // | | PopBoth | | PopBoth | + // V |PopEither| |PopEither| + // Top |PopEither| + // ``` + pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, + pop_lhs_cnt: u8, + pop_rhs_cnt: u8, } -impl<'a, 'b> PartialEq for Vertex<'a, 'b> { - fn eq(&self, other: &Self) -> bool { - self.lhs_syntax.map(|node| node.id()) == other.lhs_syntax.map(|node| node.id()) - && self.rhs_syntax.map(|node| node.id()) == other.rhs_syntax.map(|node| node.id()) - // Strictly speaking, we should compare the whole - // EnteredDelimiter stack, not just the immediate - // parents. By taking the immediate parent, we have - // vertices with different stacks that are 'equal'. - // - // This makes the graph traversal path dependent: the - // first vertex we see 'wins', and we use it for deciding - // how we can pop. - // - // In practice this seems to work well. The first vertex - // has the lowest cost, so has the most PopBoth - // occurrences, which is the best outcome. - // - // Handling this properly would require considering many - // more vertices to be distinct, exponentially increasing - // the graph size relative to tree depth. - && self.lhs_parent_id == other.lhs_parent_id - && self.rhs_parent_id == other.rhs_parent_id - // We do want to distinguish whether we can pop each side - // independently though. Without this, if we find a case - // where we can pop sides together, we don't consider the - // case where we get a better diff by popping each side - // separately. - && self.can_pop_either == other.can_pop_either - } -} -impl<'a, 'b> Eq for Vertex<'a, 'b> {} - -impl<'a, 'b> Hash for Vertex<'a, 'b> { - fn hash(&self, state: &mut H) { - self.lhs_syntax.map(|node| node.id()).hash(state); - self.rhs_syntax.map(|node| node.id()).hash(state); - - self.lhs_parent_id.hash(state); - self.rhs_parent_id.hash(state); - self.can_pop_either.hash(state); - } -} - -/// Tracks entering syntax List nodes. -#[derive(Clone, PartialEq)] -enum EnteredDelimiter<'a> { - /// If we've entered the LHS or RHS separately, we can pop either - /// side independently. - /// - /// Assumes that at least one stack is non-empty. - PopEither((Stack<&'a Syntax<'a>>, Stack<&'a Syntax<'a>>)), - /// If we've entered the LHS and RHS together, we must pop both - /// sides together too. Otherwise we'd consider the following case to have no changes. - /// - /// ```text - /// Old: (a b c) - /// New: (a b) c - /// ``` - PopBoth((&'a Syntax<'a>, &'a Syntax<'a>)), -} - -impl<'a> fmt::Debug for EnteredDelimiter<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let desc = match self { - EnteredDelimiter::PopEither((lhs_delims, rhs_delims)) => { - format!( - "PopEither(lhs count: {}, rhs count: {})", - lhs_delims.size(), - rhs_delims.size() - ) - } - EnteredDelimiter::PopBoth(_) => "PopBoth".to_string(), - }; - f.write_str(&desc) +impl<'a, 'b> Vertex<'a, 'b> { + pub fn is_end(&self) -> bool { + self.lhs_syntax.is_parent() + && self.rhs_syntax.is_parent() + && self.pop_both_ancestor.is_none() + && self.pop_lhs_cnt == 0 + && self.pop_rhs_cnt == 0 } -} -fn push_both_delimiters<'a>( - entered: &Stack>, - lhs_delim: &'a Syntax<'a>, - rhs_delim: &'a Syntax<'a>, -) -> Stack> { - entered.push(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) -} - -fn can_pop_either_parent(entered: &Stack) -> bool { - matches!(entered.peek(), Some(EnteredDelimiter::PopEither(_))) -} - -fn try_pop_both<'a>( - entered: &Stack>, -) -> Option<(&'a Syntax<'a>, &'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) => { - Some((lhs_delim, rhs_delim, entered.pop().unwrap())) + pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { + Vertex { + is_visited: Cell::new(false), + shortest_distance: Cell::new(u32::MAX), + pred_edge: Cell::new(None), + pred_vertex: Cell::new(None), + lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), + rhs_syntax: rhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), + pop_both_ancestor: None, + pop_lhs_cnt: 0, + pop_rhs_cnt: 0, } - _ => None, } -} -fn try_pop_lhs<'a>( - entered: &Stack>, -) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match lhs_delims.peek() { - Some(lhs_delim) => { - let mut entered = entered.pop().unwrap(); - let new_lhs_delims = lhs_delims.pop().unwrap(); - - if !new_lhs_delims.is_empty() || !rhs_delims.is_empty() { - entered = entered.push(EnteredDelimiter::PopEither(( - new_lhs_delims, - rhs_delims.clone(), - ))); - } - - Some((lhs_delim, entered)) - } - None => None, - }, - _ => None, + fn delim_stack_eq(&self, other: &Vertex<'a, 'b>) -> bool { + // Vertices are pinned so addresses can be used as unique IDs. + get_ptr(self.pop_both_ancestor) == get_ptr(other.pop_both_ancestor) + // We've already checked that LHS/RHS syntaxes are equal. + // Then if their PopBoth ancestors are equal, PopLhs/Rhs + // counts must be equal. + // && self.pop_lhs_cnt == other.pop_lhs_cnt + // && self.pop_rhs_cnt == other.pop_rhs_cnt } -} -fn try_pop_rhs<'a>( - entered: &Stack>, -) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match rhs_delims.peek() { - Some(rhs_delim) => { - let mut entered = entered.pop().unwrap(); - let new_rhs_delims = rhs_delims.pop().unwrap(); - - if !lhs_delims.is_empty() || !new_rhs_delims.is_empty() { - entered = entered.push(EnteredDelimiter::PopEither(( - lhs_delims.clone(), - new_rhs_delims, - ))); - } - - Some((rhs_delim, entered)) - } - None => None, - }, - _ => None, + fn can_pop_either(&self) -> bool { + self.pop_lhs_cnt != 0 || self.pop_rhs_cnt != 0 } } -fn push_lhs_delimiter<'a>( - entered: &Stack>, - delimiter: &'a Syntax<'a>, -) -> Stack> { - let mut modifying_head = false; - let (mut lhs_delims, rhs_delims) = match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => { - modifying_head = true; - (lhs_delims.clone(), rhs_delims.clone()) - } - _ => (Stack::new(), Stack::new()), - }; - lhs_delims = lhs_delims.push(delimiter); - - let entered = if modifying_head { - entered.pop().unwrap() - } else { - entered.clone() - }; - entered.push(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) +impl PartialEq for Vertex<'_, '_> { + fn eq(&self, other: &Self) -> bool { + // Strictly speaking, we should compare the whole + // entered delimiter stack, not just the immediate + // parents. By taking the immediate parent, we have + // vertices with different stacks that are 'equal'. + // + // This makes the graph traversal path dependent: the + // first vertex we see 'wins', and we use it for deciding + // how we can pop. + // + // In practice this seems to work well. The first vertex + // has the lowest cost, so has the most PopBoth + // occurrences, which is the best outcome. + // + // Handling this properly would require considering many + // more vertices to be distinct, exponentially increasing + // the graph size relative to tree depth. + self.lhs_syntax == other.lhs_syntax + && self.rhs_syntax == other.rhs_syntax + // We do want to distinguish whether we can pop each side + // independently though. Without this, if we find a case + // where we can pop sides together, we don't consider the + // case where we get a better diff by popping each side + // separately. + && self.can_pop_either() == other.can_pop_either() + } } -fn push_rhs_delimiter<'a>( - entered: &Stack>, - delimiter: &'a Syntax<'a>, -) -> Stack> { - let mut modifying_head = false; - let (lhs_delims, mut rhs_delims) = match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => { - modifying_head = true; - (lhs_delims.clone(), rhs_delims.clone()) - } - _ => (Stack::new(), Stack::new()), - }; - rhs_delims = rhs_delims.push(delimiter); +impl Eq for Vertex<'_, '_> {} - let entered = if modifying_head { - entered.pop().unwrap() - } else { - entered.clone() - }; - entered.push(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) -} - -impl<'a, 'b> Vertex<'a, 'b> { - pub fn is_end(&self) -> bool { - self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty() +impl Hash for Vertex<'_, '_> { + fn hash(&self, state: &mut H) { + self.lhs_syntax.hash(state); + self.rhs_syntax.hash(state); + self.can_pop_either().hash(state); } +} - pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { - let parents = Stack::new(); - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax, - rhs_syntax, - parents, - lhs_parent_id: None, - rhs_parent_id: None, - can_pop_either: false, - } +impl Default for Vertex<'_, '_> { + fn default() -> Self { + Self::new(None, None) } } -/// An edge in our graph, with an associated [`cost`](Edge::cost). +/// An edge in our graph. /// /// A syntax node can always be marked as novel, so a vertex will have /// at least a NovelFoo edge. Depending on the syntax nodes of the /// current [`Vertex`], other edges may also be available. /// -/// See [`neighbours`] for all the edges available for a given `Vertex`. +/// See [`get_neighbours`] for all the edges available for a given +/// `Vertex`. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum Edge { - UnchangedNode { - depth_difference: u32, - }, - EnterUnchangedDelimiter { - depth_difference: u32, - }, - ReplacedComment { - levenshtein_pct: u8, - }, - NovelAtomLHS { - contiguous: bool, - probably_punctuation: bool, - }, - NovelAtomRHS { - contiguous: bool, - probably_punctuation: bool, - }, + UnchangedNode, + EnterUnchangedDelimiter, + ReplacedComment, + NovelComment, + NovelLhs, + NovelRhs, // TODO: An EnterNovelDelimiterBoth edge might help performance // rather doing LHS and RHS separately. - EnterNovelDelimiterLHS { - contiguous: bool, - }, - EnterNovelDelimiterRHS { - contiguous: bool, - }, - ExitDelimiterLHS, - ExitDelimiterRHS, - ExitDelimiterBoth, } -const NOT_CONTIGUOUS_PENALTY: u64 = 50; - -impl Edge { - pub fn cost(self) -> u64 { - match self { - // When we're at the end of a list, there's only one exit - // delimiter possibility, so the cost doesn't matter. We - // choose a non-zero number as it's easier to reason - // about. - ExitDelimiterBoth => 1, - // Choose a higher value for exiting individually. This - // shouldn't matter since entering a novel delimiter is - // already more expensive than entering a matched - // delimiter, but be defensive. - ExitDelimiterLHS | ExitDelimiterRHS => 2, - - // Matching nodes is always best. - UnchangedNode { depth_difference } => min(40, u64::from(depth_difference) + 1), - // Matching an outer delimiter is good. - EnterUnchangedDelimiter { depth_difference } => { - 100 + min(40, u64::from(depth_difference)) - } +use Edge::*; - // Replacing a comment is better than treating it as novel. - ReplacedComment { levenshtein_pct } => 150 + u64::from(100 - levenshtein_pct), +const NOT_CONTIGUOUS_PENALTY: u32 = 50; - // Otherwise, we've added/removed a node. - NovelAtomLHS { - contiguous, - probably_punctuation, - } - | NovelAtomRHS { - contiguous, - probably_punctuation, - } => { - let mut cost = 300; - if !contiguous { - cost += NOT_CONTIGUOUS_PENALTY; - } - // If it's only punctuation, decrease the cost - // slightly. It's better to have novel punctuation - // than novel variable names. - if probably_punctuation { - cost -= 10; - } - cost - } - EnterNovelDelimiterLHS { contiguous } | EnterNovelDelimiterRHS { contiguous } => { - let mut cost = 300; - if !contiguous { - // This needs to be more than 40 greater than the - // contiguous case. Otherwise, we end up choosing - // a badly positioned unchanged delimiter just - // because it has a better depth difference. - // - // TODO: write a test for this case. - cost += NOT_CONTIGUOUS_PENALTY; - } - cost - } - } - } -} +pub type SeenMap<'syn, 'b> = hashbrown::HashMap< + &'b Vertex<'syn, 'b>, + Option<&'b Vertex<'syn, 'b>>, + BuildHasherDefault, +>; fn allocate_if_new<'syn, 'b>( v: Vertex<'syn, 'b>, alloc: &'b Bump, - seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, + seen: &mut SeenMap<'syn, 'b>, ) -> &'b Vertex<'syn, 'b> { - match seen.get_mut(&v) { - Some(existing) => { - // Don't explore more than two possible parenthesis - // nestings for each syntax node pair. - if let Some(allocated) = existing.last() { - if existing.len() >= 2 { - return *allocated; - } + match seen.get_key_value_mut(&v) { + Some((key, value)) => { + if let Some(existing) = value { + return existing; } - - // If we have seen exactly this graph node before, even - // considering parenthesis matching, return it. - for existing_node in existing.iter() { - if existing_node.parents == v.parents { - return existing_node; - } + if key.delim_stack_eq(&v) { + return key; } - - // We haven't reached the graph node limit yet, allocate a - // new one. let allocated = alloc.alloc(v); - existing.push(allocated); + *value = Some(allocated); allocated } None => { let allocated = alloc.alloc(v); - seen.insert(allocated, vec![allocated]); + seen.insert_unique_unchecked(allocated, None); allocated } } @@ -419,123 +295,137 @@ fn looks_like_punctuation(content: &str) -> bool { content == "," || content == ";" || content == "." } -/// Compute the neighbours of `v` if we haven't previously done so, -/// write them to the .neighbours cell inside `v`, and return them. -pub fn get_set_neighbours<'syn, 'b>( - v: &Vertex<'syn, 'b>, - alloc: &'b Bump, - seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, -) -> Vec<(Edge, &'b Vertex<'syn, 'b>)> { - match &*v.neighbours.borrow() { - Some(neighbours) => return neighbours.clone(), - None => {} +#[inline(always)] +fn next_sibling<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { + match syntax.next_sibling() { + Some(sibling) => SideSyntax::from_side(sibling), + None => SideSyntax::from_parent(syntax.parent()), } +} - let mut res: Vec<(Edge, &Vertex)> = vec![]; - - if v.lhs_syntax.is_none() && v.rhs_syntax.is_none() { - if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&v.parents) { - // We have exhausted all the nodes on both lists, so we can - // move up to the parent node. - - // Continue from sibling of parent. - res.push(( - ExitDelimiterBoth, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_parent.next_sibling(), - rhs_syntax: rhs_parent.next_sibling(), - can_pop_either: can_pop_either_parent(&parents_next), - parents: parents_next, - lhs_parent_id: lhs_parent.parent().map(Syntax::id), - rhs_parent_id: rhs_parent.parent().map(Syntax::id), - }, - alloc, - seen, - ), - )); - } +#[inline(always)] +fn next_child<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { + match children.get(0).copied() { + Some(child) => SideSyntax::from_side(child), + None => SideSyntax::from_parent(Some(syntax)), } +} - if v.lhs_syntax.is_none() { - if let Some((lhs_parent, parents_next)) = try_pop_lhs(&v.parents) { - // Move to next after LHS parent. - - // Continue from sibling of parent. - res.push(( - ExitDelimiterLHS, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_parent.next_sibling(), - rhs_syntax: v.rhs_syntax, - can_pop_either: can_pop_either_parent(&parents_next), - parents: parents_next, - lhs_parent_id: lhs_parent.parent().map(Syntax::id), - rhs_parent_id: v.rhs_parent_id, - }, - alloc, - seen, - ), - )); +#[inline(always)] +fn next_vertex<'a, 'b>( + mut lhs_syntax: SideSyntax<'a>, + mut rhs_syntax: SideSyntax<'a>, + mut pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, + mut pop_lhs_cnt: u8, + mut pop_rhs_cnt: u8, +) -> Vertex<'a, 'b> { + loop { + while let (Some(lhs_parent), 1..) = (lhs_syntax.get_parent(), pop_lhs_cnt) { + lhs_syntax = next_sibling(lhs_parent); + pop_lhs_cnt -= 1; } - } - if v.rhs_syntax.is_none() { - if let Some((rhs_parent, parents_next)) = try_pop_rhs(&v.parents) { - // Move to next after RHS parent. - - // Continue from sibling of parent. - res.push(( - ExitDelimiterRHS, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_parent.next_sibling(), - can_pop_either: can_pop_either_parent(&parents_next), - parents: parents_next, - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: rhs_parent.parent().map(Syntax::id), - }, - alloc, - seen, - ), - )); + while let (Some(rhs_parent), 1..) = (rhs_syntax.get_parent(), pop_rhs_cnt) { + rhs_syntax = next_sibling(rhs_parent); + pop_rhs_cnt -= 1; } + + if let (Some(lhs_parent), Some(rhs_parent), Some(ancestor)) = ( + lhs_syntax.get_parent(), + rhs_syntax.get_parent(), + pop_both_ancestor, + ) { + lhs_syntax = next_sibling(lhs_parent); + rhs_syntax = next_sibling(rhs_parent); + pop_both_ancestor = ancestor.pop_both_ancestor; + pop_lhs_cnt = ancestor.pop_lhs_cnt; + pop_rhs_cnt = ancestor.pop_rhs_cnt; + } else { + break; + } + } + + Vertex { + lhs_syntax, + rhs_syntax, + pop_both_ancestor, + pop_lhs_cnt, + pop_rhs_cnt, + ..Default::default() } +} + +#[inline(always)] +fn next_novel<'a>(syntax: &'a Syntax<'a>, pop_cnt: u8) -> (u32, SideSyntax<'a>, u8) { + let mut cost = 300; + // TODO: should this apply if prev is a parent + // node rather than a sibling? + if !syntax.prev_is_contiguous() { + cost += NOT_CONTIGUOUS_PENALTY; + } + + let next_syntax; + let next_pop_cnt; - if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) { + match syntax { + Syntax::Atom { content, .. } => { + // If it's only punctuation, decrease the cost + // slightly. It's better to have novel punctuation + // than novel variable names. + if looks_like_punctuation(content) { + cost -= 10; + } + next_syntax = next_sibling(syntax); + next_pop_cnt = pop_cnt; + } + Syntax::List { children, .. } => { + next_syntax = next_child(syntax, children); + next_pop_cnt = pop_cnt + 1; + } + }; + + (cost, next_syntax, next_pop_cnt) +} + +/// Compute the neighbours of `v`. +pub fn get_neighbours<'syn, 'b>( + v: &'b Vertex<'syn, 'b>, + alloc: &'b Bump, + seen: &mut SeenMap<'syn, 'b>, + neighbors: &mut Vec<(Edge, u32, &'b Vertex<'syn, 'b>)>, +) { + let mut add_neighbor = std::convert::identity( + #[inline(always)] + |edge, cost, vertex| { + let neighbor = allocate_if_new(vertex, alloc, seen); + if !neighbor.is_visited.get() { + neighbors.push((edge, cost, neighbor)); + } + }, + ); + + let v_info = (v.lhs_syntax.get_side(), v.rhs_syntax.get_side()); + + if let (Some(lhs_syntax), Some(rhs_syntax)) = v_info { if lhs_syntax == rhs_syntax { + // Both nodes are equal, the happy case. let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); - - // Both nodes are equal, the happy case. - res.push(( - UnchangedNode { depth_difference }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: rhs_syntax.next_sibling(), - parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, - }, - alloc, - seen, + let cost = min(40, depth_difference + 1); + + add_neighbor( + UnchangedNode, + cost, + next_vertex( + next_sibling(lhs_syntax), + next_sibling(rhs_syntax), + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, ), - )); - } - - if let ( + ); + } else if let ( Syntax::List { open_content: lhs_open_content, close_content: lhs_close_content, @@ -552,37 +442,24 @@ pub fn get_set_neighbours<'syn, 'b>( { // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { - let lhs_next = lhs_children.get(0).copied(); - let rhs_next = rhs_children.get(0).copied(); - - // TODO: be consistent between parents_next and next_parents. - let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax); - let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); - - res.push(( - EnterUnchangedDelimiter { depth_difference }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_next, - rhs_syntax: rhs_next, - parents: parents_next, - lhs_parent_id: Some(lhs_syntax.id()), - rhs_parent_id: Some(rhs_syntax.id()), - can_pop_either: false, - }, - alloc, - seen, + let cost = 100 + min(40, depth_difference); + + add_neighbor( + EnterUnchangedDelimiter, + cost, + next_vertex( + next_child(lhs_syntax, lhs_children), + next_child(rhs_syntax, rhs_children), + Some(v), + 0, + 0, ), - )); + ); } - } - - if let ( + } else if let ( Syntax::Atom { content: lhs_content, kind: AtomKind::Comment, @@ -595,193 +472,103 @@ pub fn get_set_neighbours<'syn, 'b>( }, ) = (lhs_syntax, rhs_syntax) { - // Both sides are comments and their content is reasonably - // similar. - if lhs_content != rhs_content { - let levenshtein_pct = - (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - res.push(( - ReplacedComment { levenshtein_pct }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: rhs_syntax.next_sibling(), - parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, - }, - alloc, - seen, - ), - )); - } + // Both sides are comments and their content is reasonably similar. + let levenshtein_pct = + (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u32; + let cost = 150 + (100 - levenshtein_pct); + + add_neighbor( + if levenshtein_pct > 40 { + ReplacedComment + } else { + NovelComment + }, + cost, + next_vertex( + next_sibling(lhs_syntax), + next_sibling(rhs_syntax), + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, + ), + ); } } - if let Some(lhs_syntax) = &v.lhs_syntax { - match lhs_syntax { - // Step over this novel atom. - Syntax::Atom { content, .. } => { - res.push(( - NovelAtomLHS { - // TODO: should this apply if prev is a parent - // node rather than a sibling? - contiguous: lhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: v.rhs_syntax, - parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, - }, - alloc, - seen, - ), - )); - } - // Step into this partially/fully novel list. - Syntax::List { children, .. } => { - let lhs_next = children.get(0).copied(); - - let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax); - - res.push(( - EnterNovelDelimiterLHS { - contiguous: lhs_syntax.prev_is_contiguous(), - }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_next, - rhs_syntax: v.rhs_syntax, - parents: parents_next, - lhs_parent_id: Some(lhs_syntax.id()), - rhs_parent_id: v.rhs_parent_id, - can_pop_either: true, - }, - alloc, - seen, - ), - )); - } - } + if let (Some(lhs_syntax), _) = v_info { + let (cost, next_syntax, next_pop_cnt) = next_novel(lhs_syntax, v.pop_lhs_cnt); + + add_neighbor( + NovelLhs, + cost, + next_vertex( + next_syntax, + v.rhs_syntax, + v.pop_both_ancestor, + next_pop_cnt, + v.pop_rhs_cnt, + ), + ); } - if let Some(rhs_syntax) = &v.rhs_syntax { - match rhs_syntax { - // Step over this novel atom. - Syntax::Atom { content, .. } => { - res.push(( - NovelAtomRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_syntax.next_sibling(), - parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, - }, - alloc, - seen, - ), - )); - } - // Step into this partially/fully novel list. - Syntax::List { children, .. } => { - let rhs_next = children.get(0).copied(); - - let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax); - - res.push(( - EnterNovelDelimiterRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - }, - allocate_if_new( - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_next, - parents: parents_next, - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: Some(rhs_syntax.id()), - can_pop_either: true, - }, - alloc, - seen, - ), - )); - } - } + if let (_, Some(rhs_syntax)) = v_info { + let (cost, next_syntax, next_pop_cnt) = next_novel(rhs_syntax, v.pop_rhs_cnt); + + add_neighbor( + NovelRhs, + cost, + next_vertex( + v.lhs_syntax, + next_syntax, + v.pop_both_ancestor, + v.pop_lhs_cnt, + next_pop_cnt, + ), + ); } - assert!( - !res.is_empty(), - "Must always find some next steps if node is not the end" - ); - - v.neighbours.replace(Some(res.clone())); - res } pub fn populate_change_map<'a, 'b>( route: &[(Edge, &'b Vertex<'a, 'b>)], change_map: &mut ChangeMap<'a>, ) { + change_map.reserve(route.len()); + for (e, v) in route { match e { - ExitDelimiterBoth | ExitDelimiterLHS | ExitDelimiterRHS => { - // Nothing to do: we have already marked this node when we entered it. - } - UnchangedNode { .. } => { + UnchangedNode => { // No change on this node or its children. - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); insert_deep_unchanged(lhs, rhs, change_map); - insert_deep_unchanged(rhs, lhs, change_map); } - EnterUnchangedDelimiter { .. } => { + EnterUnchangedDelimiter => { // No change on the outer delimiter, but children may // have changed. - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); change_map.insert(lhs, ChangeKind::Unchanged(rhs)); change_map.insert(rhs, ChangeKind::Unchanged(lhs)); } - ReplacedComment { levenshtein_pct } => { - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); - - if *levenshtein_pct > 40 { - change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); - change_map.insert(rhs, ChangeKind::ReplacedComment(rhs, lhs)); - } else { - change_map.insert(lhs, ChangeKind::Novel); - change_map.insert(rhs, ChangeKind::Novel); - } + ReplacedComment => { + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); + change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); + change_map.insert(rhs, ChangeKind::ReplacedComment(rhs, lhs)); + } + NovelComment => { + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); + change_map.insert(lhs, ChangeKind::Novel); + change_map.insert(rhs, ChangeKind::Novel); } - NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => { - let lhs = v.lhs_syntax.unwrap(); + NovelLhs => { + let lhs = v.lhs_syntax.get_side().unwrap(); change_map.insert(lhs, ChangeKind::Novel); } - NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => { - let rhs = v.rhs_syntax.unwrap(); + NovelRhs => { + let rhs = v.rhs_syntax.get_side().unwrap(); change_map.insert(rhs, ChangeKind::Novel); } } diff --git a/src/diff/mod.rs b/src/diff/mod.rs index 4bd7efd03d..44f10e6c78 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -3,5 +3,4 @@ pub mod dijkstra; mod graph; pub mod myers_diff; pub mod sliders; -mod stack; pub mod unchanged; diff --git a/src/diff/sliders.rs b/src/diff/sliders.rs index dd94cc3d64..ce3f98740b 100644 --- a/src/diff/sliders.rs +++ b/src/diff/sliders.rs @@ -486,7 +486,6 @@ fn slide_to_prev_node<'a>( insert_deep_novel(before_start_node, change_map); insert_deep_unchanged(last_node, opposite, change_map); - insert_deep_unchanged(opposite, last_node, change_map); } } @@ -535,7 +534,6 @@ fn slide_to_next_node<'a>( }; insert_deep_unchanged(start_node, opposite, change_map); - insert_deep_unchanged(opposite, start_node, change_map); insert_deep_novel(after_last_node, change_map); } } diff --git a/src/diff/stack.rs b/src/diff/stack.rs deleted file mode 100644 index b366937a4e..0000000000 --- a/src/diff/stack.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::rc::Rc; - -#[derive(Debug, Clone, Default, PartialEq, Eq)] -struct Node { - val: T, - next: Option>>, -} - -#[derive(Debug, Clone, Default, PartialEq, Eq)] -pub struct Stack { - head: Option>>, -} - -impl Stack { - pub fn new() -> Self { - Self { head: None } - } - - pub fn peek(&self) -> Option<&T> { - self.head.as_deref().map(|n| &n.val) - } - - pub fn pop(&self) -> Option> { - self.head.as_deref().map(|n| Self { - head: n.next.clone(), - }) - } - - pub fn push(&self, v: T) -> Stack { - Self { - head: Some(Rc::new(Node { - val: v, - next: self.head.clone(), - })), - } - } - - // O(n) - pub fn size(&self) -> usize { - let mut res = 0; - let mut node = &self.head; - while let Some(next) = node { - res += 1; - node = &next.next; - } - res - } - - pub fn is_empty(&self) -> bool { - self.head.is_none() - } -} diff --git a/src/diff/unchanged.rs b/src/diff/unchanged.rs index 2b7de0d56e..88042172f8 100644 --- a/src/diff/unchanged.rs +++ b/src/diff/unchanged.rs @@ -71,7 +71,6 @@ fn split_unchanged<'a>( lhs_section_nodes.iter().zip(rhs_section_nodes.iter()) { insert_deep_unchanged(lhs_section_node, rhs_section_node, change_map); - insert_deep_unchanged(rhs_section_node, lhs_section_node, change_map); } } ChangeState::PossiblyChanged => { @@ -408,7 +407,6 @@ fn shrink_unchanged_at_ends<'a>( // trivial unchanged node in the middle. if lhs_node.content_id() == rhs_node.content_id() { insert_deep_unchanged(lhs_node, rhs_node, change_map); - insert_deep_unchanged(rhs_node, lhs_node, change_map); changed = true; lhs_nodes = &lhs_nodes[1..]; @@ -421,7 +419,6 @@ fn shrink_unchanged_at_ends<'a>( while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.last(), rhs_nodes.last()) { if lhs_node.content_id() == rhs_node.content_id() { insert_deep_unchanged(lhs_node, rhs_node, change_map); - insert_deep_unchanged(rhs_node, lhs_node, change_map); changed = true; lhs_nodes = &lhs_nodes[..lhs_nodes.len() - 1];