From 7cd3b108572a796dc109cd8e45f0c792bd4dc1e5 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 02:38:30 +0800 Subject: [PATCH 01/58] Simplify push_{lhs,rhs}_delimiter --- src/diff/graph.rs | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 23f9c30633..a599e1f5e7 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -211,44 +211,24 @@ fn push_lhs_delimiter<'a>( entered: &Stack>, delimiter: &'a Syntax<'a>, ) -> Stack> { - let mut modifying_head = false; - let (mut lhs_delims, rhs_delims) = match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => { - modifying_head = true; - (lhs_delims.clone(), rhs_delims.clone()) - } - _ => (Stack::new(), Stack::new()), - }; - lhs_delims = lhs_delims.push(delimiter); - - let entered = if modifying_head { - entered.pop().unwrap() - } else { - entered.clone() - }; - entered.push(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) + match entered.peek() { + Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( + EnteredDelimiter::PopEither((lhs_delims.push(delimiter), rhs_delims.clone())), + ), + _ => entered.push(EnteredDelimiter::PopEither((Stack::new().push(delimiter), Stack::new()))), + } } fn push_rhs_delimiter<'a>( entered: &Stack>, delimiter: &'a Syntax<'a>, ) -> Stack> { - let mut modifying_head = false; - let (lhs_delims, mut rhs_delims) = match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => { - modifying_head = true; - (lhs_delims.clone(), rhs_delims.clone()) - } - _ => (Stack::new(), Stack::new()), - }; - rhs_delims = rhs_delims.push(delimiter); - - let entered = if modifying_head { - entered.pop().unwrap() - } else { - entered.clone() - }; - entered.push(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) + match entered.peek() { + Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( + EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter))), + ), + _ => entered.push(EnteredDelimiter::PopEither((Stack::new(), Stack::new().push(delimiter)))), + } } impl<'a, 'b> Vertex<'a, 'b> { From 97f9a525bd9907d7f3df4bb2b8924fad4d926ab1 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 05:00:20 +0800 Subject: [PATCH 02/58] Remove field can_pop_either from Vertex --- src/diff/graph.rs | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index a599e1f5e7..bb47cd355d 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -56,7 +56,6 @@ pub struct Vertex<'a, 'b> { parents: Stack>, lhs_parent_id: Option, rhs_parent_id: Option, - can_pop_either: bool, } impl<'a, 'b> PartialEq for Vertex<'a, 'b> { @@ -86,7 +85,7 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { // where we can pop sides together, we don't consider the // case where we get a better diff by popping each side // separately. - && self.can_pop_either == other.can_pop_either + && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) } } impl<'a, 'b> Eq for Vertex<'a, 'b> {} @@ -98,7 +97,7 @@ impl<'a, 'b> Hash for Vertex<'a, 'b> { self.lhs_parent_id.hash(state); self.rhs_parent_id.hash(state); - self.can_pop_either.hash(state); + can_pop_either_parent(&self.parents).hash(state); } } @@ -215,7 +214,10 @@ fn push_lhs_delimiter<'a>( Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( EnteredDelimiter::PopEither((lhs_delims.push(delimiter), rhs_delims.clone())), ), - _ => entered.push(EnteredDelimiter::PopEither((Stack::new().push(delimiter), Stack::new()))), + _ => entered.push(EnteredDelimiter::PopEither(( + Stack::new().push(delimiter), + Stack::new(), + ))), } } @@ -227,7 +229,10 @@ fn push_rhs_delimiter<'a>( Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter))), ), - _ => entered.push(EnteredDelimiter::PopEither((Stack::new(), Stack::new().push(delimiter)))), + _ => entered.push(EnteredDelimiter::PopEither(( + Stack::new(), + Stack::new().push(delimiter), + ))), } } @@ -246,7 +251,6 @@ impl<'a, 'b> Vertex<'a, 'b> { parents, lhs_parent_id: None, rhs_parent_id: None, - can_pop_either: false, } } } @@ -427,7 +431,6 @@ pub fn get_set_neighbours<'syn, 'b>( predecessor: Cell::new(None), lhs_syntax: lhs_parent.next_sibling(), rhs_syntax: rhs_parent.next_sibling(), - can_pop_either: can_pop_either_parent(&parents_next), parents: parents_next, lhs_parent_id: lhs_parent.parent().map(Syntax::id), rhs_parent_id: rhs_parent.parent().map(Syntax::id), @@ -452,7 +455,6 @@ pub fn get_set_neighbours<'syn, 'b>( predecessor: Cell::new(None), lhs_syntax: lhs_parent.next_sibling(), rhs_syntax: v.rhs_syntax, - can_pop_either: can_pop_either_parent(&parents_next), parents: parents_next, lhs_parent_id: lhs_parent.parent().map(Syntax::id), rhs_parent_id: v.rhs_parent_id, @@ -477,7 +479,6 @@ pub fn get_set_neighbours<'syn, 'b>( predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: rhs_parent.next_sibling(), - can_pop_either: can_pop_either_parent(&parents_next), parents: parents_next, lhs_parent_id: v.lhs_parent_id, rhs_parent_id: rhs_parent.parent().map(Syntax::id), @@ -507,7 +508,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -553,7 +553,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: parents_next, lhs_parent_id: Some(lhs_syntax.id()), rhs_parent_id: Some(rhs_syntax.id()), - can_pop_either: false, }, alloc, seen, @@ -591,7 +590,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -621,7 +619,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -647,7 +644,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: parents_next, lhs_parent_id: Some(lhs_syntax.id()), rhs_parent_id: v.rhs_parent_id, - can_pop_either: true, }, alloc, seen, @@ -675,7 +671,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -701,7 +696,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: parents_next, lhs_parent_id: v.lhs_parent_id, rhs_parent_id: Some(rhs_syntax.id()), - can_pop_either: true, }, alloc, seen, From 9f1a0ab1e606e7f32c22d42b6652555426c7b1fc Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 05:15:45 +0800 Subject: [PATCH 03/58] Reduce number of branches of Vertex::eq --- src/diff/graph.rs | 60 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index bb47cd355d..26365e1f97 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -60,32 +60,40 @@ pub struct Vertex<'a, 'b> { impl<'a, 'b> PartialEq for Vertex<'a, 'b> { fn eq(&self, other: &Self) -> bool { - self.lhs_syntax.map(|node| node.id()) == other.lhs_syntax.map(|node| node.id()) - && self.rhs_syntax.map(|node| node.id()) == other.rhs_syntax.map(|node| node.id()) - // Strictly speaking, we should compare the whole - // EnteredDelimiter stack, not just the immediate - // parents. By taking the immediate parent, we have - // vertices with different stacks that are 'equal'. - // - // This makes the graph traversal path dependent: the - // first vertex we see 'wins', and we use it for deciding - // how we can pop. - // - // In practice this seems to work well. The first vertex - // has the lowest cost, so has the most PopBoth - // occurrences, which is the best outcome. - // - // Handling this properly would require considering many - // more vertices to be distinct, exponentially increasing - // the graph size relative to tree depth. - && self.lhs_parent_id == other.lhs_parent_id - && self.rhs_parent_id == other.rhs_parent_id - // We do want to distinguish whether we can pop each side - // independently though. Without this, if we find a case - // where we can pop sides together, we don't consider the - // case where we get a better diff by popping each side - // separately. - && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) + // Strictly speaking, we should compare the whole + // EnteredDelimiter stack, not just the immediate + // parents. By taking the immediate parent, we have + // vertices with different stacks that are 'equal'. + // + // This makes the graph traversal path dependent: the + // first vertex we see 'wins', and we use it for deciding + // how we can pop. + // + // In practice this seems to work well. The first vertex + // has the lowest cost, so has the most PopBoth + // occurrences, which is the best outcome. + // + // Handling this properly would require considering many + // more vertices to be distinct, exponentially increasing + // the graph size relative to tree depth. + let b0 = match (self.lhs_syntax, other.lhs_syntax) { + (Some(s0), Some(s1)) => s0.id() == s1.id(), + (None, None) => self.lhs_parent_id == other.lhs_parent_id, + _ => false, + }; + let b1 = match (self.rhs_syntax, other.rhs_syntax) { + (Some(s0), Some(s1)) => s0.id() == s1.id(), + (None, None) => self.rhs_parent_id == other.rhs_parent_id, + _ => false, + }; + // We do want to distinguish whether we can pop each side + // independently though. Without this, if we find a case + // where we can pop sides together, we don't consider the + // case where we get a better diff by popping each side + // separately. + let b2 = can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents); + + b0 && b1 && b2 } } impl<'a, 'b> Eq for Vertex<'a, 'b> {} From d2f5e996b60465ee866ae13677930328740a14b3 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 07:19:47 +0800 Subject: [PATCH 04/58] Compress &Syntax and SyntaxId into a usize --- src/diff/dijkstra.rs | 4 +- src/diff/graph.rs | 170 ++++++++++++++++++++++++------------------- 2 files changed, 98 insertions(+), 76 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index f9313c12f6..936dda7815 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -219,9 +219,9 @@ pub fn mark_syntax<'a>( .map(|x| { format!( "{:20} {:20} --- {:3} {:?}", - x.1.lhs_syntax + x.1.lhs_syntax.get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), - x.1.rhs_syntax + x.1.rhs_syntax.get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), x.0.cost(), x.0, diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 26365e1f97..52cd9ff196 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -7,6 +7,8 @@ use std::{ cmp::min, fmt, hash::{Hash, Hasher}, + marker::PhantomData, + mem::transmute_copy, }; use strsim::normalized_levenshtein; @@ -19,6 +21,61 @@ use crate::{ }; use Edge::*; +/// Compress `&Syntax` and `SyntaxId` into a usize. +/// +/// Utilize the LSB as flag since `Syntax` is aligned. +/// +/// ```text +/// LSB = 0 -> &Syntax +/// LSB = 1 -> SyntaxId * 2 + 1 +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SyntaxRefOrId<'a> { + data: usize, + phantom: PhantomData<&'a Syntax<'a>>, +} + +impl SyntaxRefOrId<'_> { + pub fn is_ref(&self) -> bool { + self.data & 1 == 0 + } + + pub fn is_id(&self) -> bool { + self.data & 1 == 1 + } + + pub fn get_ref<'a>(&self) -> Option<&'a Syntax<'a>> { + if self.is_ref() { + Some(unsafe { transmute_copy(&self.data) }) + } else { + None + } + } +} + +impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { + fn from(s: &'a Syntax<'a>) -> Self { + Self { + data: unsafe { transmute_copy(&s) }, + phantom: PhantomData, + } + } +} + +impl<'a> From> for SyntaxRefOrId<'a> { + fn from(s: Option) -> Self { + Self { + data: s.map_or(0, |n| n.get() as usize) * 2 + 1, + phantom: PhantomData, + } + } +} + +fn next_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { + let parent_id = syntax.parent().map(Syntax::id); + syntax.next_sibling().map_or(parent_id.into(), |n| n.into()) +} + /// A vertex in a directed acyclic graph that represents a diff. /// /// Each vertex represents two pointers: one to the next unmatched LHS @@ -51,11 +108,9 @@ use Edge::*; pub struct Vertex<'a, 'b> { pub neighbours: RefCell)>>>, pub predecessor: Cell)>>, - pub lhs_syntax: Option<&'a Syntax<'a>>, - pub rhs_syntax: Option<&'a Syntax<'a>>, + pub lhs_syntax: SyntaxRefOrId<'a>, + pub rhs_syntax: SyntaxRefOrId<'a>, parents: Stack>, - lhs_parent_id: Option, - rhs_parent_id: Option, } impl<'a, 'b> PartialEq for Vertex<'a, 'b> { @@ -76,35 +131,24 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { // Handling this properly would require considering many // more vertices to be distinct, exponentially increasing // the graph size relative to tree depth. - let b0 = match (self.lhs_syntax, other.lhs_syntax) { - (Some(s0), Some(s1)) => s0.id() == s1.id(), - (None, None) => self.lhs_parent_id == other.lhs_parent_id, - _ => false, - }; - let b1 = match (self.rhs_syntax, other.rhs_syntax) { - (Some(s0), Some(s1)) => s0.id() == s1.id(), - (None, None) => self.rhs_parent_id == other.rhs_parent_id, - _ => false, - }; + // We do want to distinguish whether we can pop each side // independently though. Without this, if we find a case // where we can pop sides together, we don't consider the // case where we get a better diff by popping each side // separately. - let b2 = can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents); - b0 && b1 && b2 + self.lhs_syntax == other.lhs_syntax + && self.rhs_syntax == other.rhs_syntax + && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) } } impl<'a, 'b> Eq for Vertex<'a, 'b> {} impl<'a, 'b> Hash for Vertex<'a, 'b> { fn hash(&self, state: &mut H) { - self.lhs_syntax.map(|node| node.id()).hash(state); - self.rhs_syntax.map(|node| node.id()).hash(state); - - self.lhs_parent_id.hash(state); - self.rhs_parent_id.hash(state); + self.lhs_syntax.hash(state); + self.rhs_syntax.hash(state); can_pop_either_parent(&self.parents).hash(state); } } @@ -246,7 +290,7 @@ fn push_rhs_delimiter<'a>( impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { - self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty() + self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { @@ -254,11 +298,9 @@ impl<'a, 'b> Vertex<'a, 'b> { Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax, - rhs_syntax, + lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), + rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), parents, - lhs_parent_id: None, - rhs_parent_id: None, } } } @@ -425,7 +467,7 @@ pub fn get_set_neighbours<'syn, 'b>( let mut res: Vec<(Edge, &Vertex)> = vec![]; - if v.lhs_syntax.is_none() && v.rhs_syntax.is_none() { + if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&v.parents) { // We have exhausted all the nodes on both lists, so we can // move up to the parent node. @@ -437,11 +479,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_parent.next_sibling(), - rhs_syntax: rhs_parent.next_sibling(), + lhs_syntax: next_syntax(lhs_parent), + rhs_syntax: next_syntax(rhs_parent), parents: parents_next, - lhs_parent_id: lhs_parent.parent().map(Syntax::id), - rhs_parent_id: rhs_parent.parent().map(Syntax::id), }, alloc, seen, @@ -450,7 +490,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if v.lhs_syntax.is_none() { + if v.lhs_syntax.is_id() { if let Some((lhs_parent, parents_next)) = try_pop_lhs(&v.parents) { // Move to next after LHS parent. @@ -461,11 +501,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_parent.next_sibling(), + lhs_syntax: next_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, parents: parents_next, - lhs_parent_id: lhs_parent.parent().map(Syntax::id), - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -474,7 +512,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if v.rhs_syntax.is_none() { + if v.rhs_syntax.is_id() { if let Some((rhs_parent, parents_next)) = try_pop_rhs(&v.parents) { // Move to next after RHS parent. @@ -486,10 +524,8 @@ pub fn get_set_neighbours<'syn, 'b>( neighbours: RefCell::new(None), predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_parent.next_sibling(), + rhs_syntax: next_syntax(rhs_parent), parents: parents_next, - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: rhs_parent.parent().map(Syntax::id), }, alloc, seen, @@ -498,7 +534,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) { + if let (Some(lhs_syntax), Some(rhs_syntax)) = (v.lhs_syntax.get_ref(), v.rhs_syntax.get_ref()) { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -511,11 +547,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: rhs_syntax.next_sibling(), + lhs_syntax: next_syntax(lhs_syntax), + rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -556,11 +590,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_next, - rhs_syntax: rhs_next, + lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), + rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, - lhs_parent_id: Some(lhs_syntax.id()), - rhs_parent_id: Some(rhs_syntax.id()), }, alloc, seen, @@ -593,11 +625,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: rhs_syntax.next_sibling(), + lhs_syntax: next_syntax(lhs_syntax), + rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -607,7 +637,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if let Some(lhs_syntax) = &v.lhs_syntax { + if let Some(lhs_syntax) = v.lhs_syntax.get_ref() { match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -622,11 +652,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), + lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -647,11 +675,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_next, + lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), rhs_syntax: v.rhs_syntax, parents: parents_next, - lhs_parent_id: Some(lhs_syntax.id()), - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -661,7 +687,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if let Some(rhs_syntax) = &v.rhs_syntax { + if let Some(rhs_syntax) = v.rhs_syntax.get_ref() { match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -675,10 +701,8 @@ pub fn get_set_neighbours<'syn, 'b>( neighbours: RefCell::new(None), predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_syntax.next_sibling(), + rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -700,10 +724,8 @@ pub fn get_set_neighbours<'syn, 'b>( neighbours: RefCell::new(None), predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_next, + rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: Some(rhs_syntax.id()), }, alloc, seen, @@ -732,8 +754,8 @@ pub fn populate_change_map<'a, 'b>( } UnchangedNode { .. } => { // No change on this node or its children. - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); insert_deep_unchanged(lhs, rhs, change_map); insert_deep_unchanged(rhs, lhs, change_map); @@ -741,14 +763,14 @@ pub fn populate_change_map<'a, 'b>( EnterUnchangedDelimiter { .. } => { // No change on the outer delimiter, but children may // have changed. - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); change_map.insert(lhs, ChangeKind::Unchanged(rhs)); change_map.insert(rhs, ChangeKind::Unchanged(lhs)); } ReplacedComment { levenshtein_pct } => { - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); if *levenshtein_pct > 40 { change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); @@ -759,11 +781,11 @@ pub fn populate_change_map<'a, 'b>( } } NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => { - let lhs = v.lhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); change_map.insert(lhs, ChangeKind::Novel); } NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => { - let rhs = v.rhs_syntax.unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); change_map.insert(rhs, ChangeKind::Novel); } } From b635df4538471276aff175df204904b502a99334 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 07:57:25 +0800 Subject: [PATCH 05/58] Simplify get_set_neighbours using Default::default --- src/diff/graph.rs | 75 ++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 52cd9ff196..f720da4f68 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -104,7 +104,7 @@ fn next_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { /// LHS: X A RHS: A /// ^ ^ /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq)] pub struct Vertex<'a, 'b> { pub neighbours: RefCell)>>>, pub predecessor: Cell)>>, @@ -113,6 +113,23 @@ pub struct Vertex<'a, 'b> { parents: Stack>, } +impl<'a, 'b> Vertex<'a, 'b> { + pub fn is_end(&self) -> bool { + self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() + } + + pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { + let parents = Stack::new(); + Vertex { + neighbours: RefCell::new(None), + predecessor: Cell::new(None), + lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), + rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), + parents, + } + } +} + impl<'a, 'b> PartialEq for Vertex<'a, 'b> { fn eq(&self, other: &Self) -> bool { // Strictly speaking, we should compare the whole @@ -143,7 +160,6 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) } } -impl<'a, 'b> Eq for Vertex<'a, 'b> {} impl<'a, 'b> Hash for Vertex<'a, 'b> { fn hash(&self, state: &mut H) { @@ -153,8 +169,14 @@ impl<'a, 'b> Hash for Vertex<'a, 'b> { } } +impl<'a, 'b> Default for Vertex<'a, 'b> { + fn default() -> Self { + Self::new(None, None) + } +} + /// Tracks entering syntax List nodes. -#[derive(Clone, PartialEq)] +#[derive(Clone, PartialEq, Eq)] enum EnteredDelimiter<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. @@ -288,23 +310,6 @@ fn push_rhs_delimiter<'a>( } } -impl<'a, 'b> Vertex<'a, 'b> { - pub fn is_end(&self) -> bool { - self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() - } - - pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { - let parents = Stack::new(); - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), - rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), - parents, - } - } -} - /// An edge in our graph, with an associated [`cost`](Edge::cost). /// /// A syntax node can always be marked as novel, so a vertex will have @@ -477,11 +482,10 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterBoth, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_parent), rhs_syntax: next_syntax(rhs_parent), parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -499,11 +503,10 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterLHS, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -521,11 +524,10 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterRHS, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: next_syntax(rhs_parent), parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -545,11 +547,10 @@ pub fn get_set_neighbours<'syn, 'b>( UnchangedNode { depth_difference }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -588,11 +589,10 @@ pub fn get_set_neighbours<'syn, 'b>( EnterUnchangedDelimiter { depth_difference }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -623,11 +623,10 @@ pub fn get_set_neighbours<'syn, 'b>( ReplacedComment { levenshtein_pct }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -650,11 +649,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -673,11 +671,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), rhs_syntax: v.rhs_syntax, parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -698,11 +695,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -721,11 +717,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, + ..Vertex::default() }, alloc, seen, From 69b9214a01b705cd5faaaefe9e905d278f498265 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:11:17 +0800 Subject: [PATCH 06/58] Simplify get_set_neighbours by extracting functions --- src/diff/graph.rs | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index f720da4f68..7558a7df47 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -71,11 +71,18 @@ impl<'a> From> for SyntaxRefOrId<'a> { } } -fn next_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { +fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { let parent_id = syntax.parent().map(Syntax::id); syntax.next_sibling().map_or(parent_id.into(), |n| n.into()) } +fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SyntaxRefOrId<'a> { + children + .get(0) + .copied() + .map_or(Some(syntax.id()).into(), |s| s.into()) +} + /// A vertex in a directed acyclic graph that represents a diff. /// /// Each vertex represents two pointers: one to the next unmatched LHS @@ -482,8 +489,8 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterBoth, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_parent), - rhs_syntax: next_syntax(rhs_parent), + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), parents: parents_next, ..Vertex::default() }, @@ -503,7 +510,7 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterLHS, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_parent), + lhs_syntax: next_sibling_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, parents: parents_next, ..Vertex::default() @@ -525,7 +532,7 @@ pub fn get_set_neighbours<'syn, 'b>( allocate_if_new( Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_syntax(rhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), parents: parents_next, ..Vertex::default() }, @@ -547,8 +554,8 @@ pub fn get_set_neighbours<'syn, 'b>( UnchangedNode { depth_difference }, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_syntax), - rhs_syntax: next_syntax(rhs_syntax), + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), parents: v.parents.clone(), ..Vertex::default() }, @@ -575,9 +582,6 @@ pub fn get_set_neighbours<'syn, 'b>( { // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { - let lhs_next = lhs_children.get(0).copied(); - let rhs_next = rhs_children.get(0).copied(); - // TODO: be consistent between parents_next and next_parents. let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax); @@ -589,8 +593,8 @@ pub fn get_set_neighbours<'syn, 'b>( EnterUnchangedDelimiter { depth_difference }, allocate_if_new( Vertex { - lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), - rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), + lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), + rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), parents: parents_next, ..Vertex::default() }, @@ -623,8 +627,8 @@ pub fn get_set_neighbours<'syn, 'b>( ReplacedComment { levenshtein_pct }, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_syntax), - rhs_syntax: next_syntax(rhs_syntax), + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), parents: v.parents.clone(), ..Vertex::default() }, @@ -649,7 +653,7 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_syntax), + lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, parents: v.parents.clone(), ..Vertex::default() @@ -661,8 +665,6 @@ pub fn get_set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let lhs_next = children.get(0).copied(); - let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax); res.push(( @@ -671,7 +673,7 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), + lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, parents: parents_next, ..Vertex::default() @@ -696,7 +698,7 @@ pub fn get_set_neighbours<'syn, 'b>( allocate_if_new( Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_syntax(rhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), parents: v.parents.clone(), ..Vertex::default() }, @@ -707,8 +709,6 @@ pub fn get_set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let rhs_next = children.get(0).copied(); - let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax); res.push(( @@ -718,7 +718,7 @@ pub fn get_set_neighbours<'syn, 'b>( allocate_if_new( Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), + rhs_syntax: next_child_syntax(rhs_syntax, children), parents: parents_next, ..Vertex::default() }, From 827e26d2fc0077ceb359a191ae946592482e0e59 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:32:20 +0800 Subject: [PATCH 07/58] Eliminate some vec clones --- src/diff/dijkstra.rs | 32 +++++++++++++++++++------------- src/diff/graph.rs | 12 +++++------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 936dda7815..0c6b59e787 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,7 +5,7 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{get_set_neighbours, populate_change_map, Edge, Vertex}, + diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; @@ -40,18 +40,22 @@ fn shortest_vertex_path<'a, 'b>( break current; } - for neighbour in &get_set_neighbours(current, vertex_arena, &mut seen) { - let (edge, next) = neighbour; - let distance_to_next = distance + edge.cost(); + set_neighbours(current, vertex_arena, &mut seen); - let found_shorter_route = match next.predecessor.get() { - Some((prev_shortest, _)) => distance_to_next < prev_shortest, - None => true, - }; + if let Some(neighbors) = &*current.neighbours.borrow() { + for neighbour in neighbors { + let (edge, next) = neighbour; + let distance_to_next = distance + edge.cost(); - if found_shorter_route { - next.predecessor.replace(Some((distance_to_next, current))); - heap.push(Reverse(distance_to_next), next); + let found_shorter_route = match next.predecessor.get() { + Some((prev_shortest, _)) => distance_to_next < prev_shortest, + None => true, + }; + + if found_shorter_route { + next.predecessor.replace(Some((distance_to_next, current))); + heap.push(Reverse(distance_to_next), next); + } } } @@ -219,9 +223,11 @@ pub fn mark_syntax<'a>( .map(|x| { format!( "{:20} {:20} --- {:3} {:?}", - x.1.lhs_syntax.get_ref() + x.1.lhs_syntax + .get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), - x.1.rhs_syntax.get_ref() + x.1.rhs_syntax + .get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), x.0.cost(), x.0, diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7558a7df47..9a2529698e 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -467,14 +467,13 @@ fn looks_like_punctuation(content: &str) -> bool { /// Compute the neighbours of `v` if we haven't previously done so, /// write them to the .neighbours cell inside `v`, and return them. -pub fn get_set_neighbours<'syn, 'b>( +pub fn set_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, -) -> Vec<(Edge, &'b Vertex<'syn, 'b>)> { - match &*v.neighbours.borrow() { - Some(neighbours) => return neighbours.clone(), - None => {} +) { + if v.neighbours.borrow().is_some() { + return; } let mut res: Vec<(Edge, &Vertex)> = vec![]; @@ -734,8 +733,7 @@ pub fn get_set_neighbours<'syn, 'b>( "Must always find some next steps if node is not the end" ); - v.neighbours.replace(Some(res.clone())); - res + v.neighbours.replace(Some(res)); } pub fn populate_change_map<'a, 'b>( From 1631eb220cc8c71e7643b7ff3cd5224661e65389 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:39:03 +0800 Subject: [PATCH 08/58] Split predecessor to two parts --- src/diff/dijkstra.rs | 17 ++++++----------- src/diff/graph.rs | 4 +++- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 0c6b59e787..7c936410c6 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -43,17 +43,12 @@ fn shortest_vertex_path<'a, 'b>( set_neighbours(current, vertex_arena, &mut seen); if let Some(neighbors) = &*current.neighbours.borrow() { - for neighbour in neighbors { - let (edge, next) = neighbour; + for &(edge, next) in neighbors { let distance_to_next = distance + edge.cost(); - let found_shorter_route = match next.predecessor.get() { - Some((prev_shortest, _)) => distance_to_next < prev_shortest, - None => true, - }; - - if found_shorter_route { - next.predecessor.replace(Some((distance_to_next, current))); + if distance_to_next < next.shortest_distance.get() { + next.shortest_distance.set(distance_to_next); + next.predecessor.replace(Some(current)); heap.push(Reverse(distance_to_next), next); } } @@ -74,9 +69,9 @@ fn shortest_vertex_path<'a, 'b>( heap.len(), ); - let mut current = Some((0, end)); + let mut current = Some(end); let mut vertex_route: Vec<&'b Vertex<'a, 'b>> = vec![]; - while let Some((_, node)) = current { + while let Some(node) = current { vertex_route.push(node); current = node.predecessor.get(); } diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 9a2529698e..59c96171d5 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -114,7 +114,8 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> #[derive(Debug, Clone, Eq)] pub struct Vertex<'a, 'b> { pub neighbours: RefCell)>>>, - pub predecessor: Cell)>>, + pub shortest_distance: Cell, + pub predecessor: Cell>>, pub lhs_syntax: SyntaxRefOrId<'a>, pub rhs_syntax: SyntaxRefOrId<'a>, parents: Stack>, @@ -129,6 +130,7 @@ impl<'a, 'b> Vertex<'a, 'b> { let parents = Stack::new(); Vertex { neighbours: RefCell::new(None), + shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), From 9c551abe6876a1514d6505076257faa7ffe1e2db Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:59:16 +0800 Subject: [PATCH 09/58] Change a RefCell in Vertex to UnsafeCell --- src/diff/dijkstra.rs | 9 ++++----- src/diff/graph.rs | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 7c936410c6..2c2b911968 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -42,7 +42,7 @@ fn shortest_vertex_path<'a, 'b>( set_neighbours(current, vertex_arena, &mut seen); - if let Some(neighbors) = &*current.neighbours.borrow() { + if let Some(neighbors) = unsafe { &*current.neighbours.get() } { for &(edge, next) in neighbors { let distance_to_next = distance + edge.cost(); @@ -110,7 +110,7 @@ fn shortest_path<'a, 'b>( size_hint: usize, graph_limit: usize, ) -> Result)>, ExceededGraphLimit> { - let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start.clone()); + let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start); let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?; Ok(shortest_path_with_edges(&vertex_path)) } @@ -119,9 +119,8 @@ fn edge_between<'a, 'b>(before: &Vertex<'a, 'b>, after: &Vertex<'a, 'b>) -> Edge assert_ne!(before, after); let mut shortest_edge: Option = None; - if let Some(neighbours) = &*before.neighbours.borrow() { - for neighbour in neighbours { - let (edge, next) = *neighbour; + if let Some(neighbours) = unsafe { &*before.neighbours.get() } { + for &(edge, next) in neighbours { // If there are multiple edges that can take us to `next`, // prefer the shortest. if *next == *after { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 59c96171d5..7879071f7f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -3,7 +3,7 @@ use bumpalo::Bump; use rustc_hash::FxHashMap; use std::{ - cell::{Cell, RefCell}, + cell::{Cell, UnsafeCell}, cmp::min, fmt, hash::{Hash, Hasher}, @@ -62,7 +62,7 @@ impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { } } -impl<'a> From> for SyntaxRefOrId<'a> { +impl From> for SyntaxRefOrId<'_> { fn from(s: Option) -> Self { Self { data: s.map_or(0, |n| n.get() as usize) * 2 + 1, @@ -111,9 +111,10 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> /// LHS: X A RHS: A /// ^ ^ /// ``` -#[derive(Debug, Clone, Eq)] +#[derive(Debug)] pub struct Vertex<'a, 'b> { - pub neighbours: RefCell)>>>, + // TODO: how to design the boundary of unsafe? + pub neighbours: UnsafeCell)>>>, pub shortest_distance: Cell, pub predecessor: Cell>>, pub lhs_syntax: SyntaxRefOrId<'a>, @@ -129,7 +130,7 @@ impl<'a, 'b> Vertex<'a, 'b> { pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { let parents = Stack::new(); Vertex { - neighbours: RefCell::new(None), + neighbours: UnsafeCell::new(None), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), @@ -139,7 +140,7 @@ impl<'a, 'b> Vertex<'a, 'b> { } } -impl<'a, 'b> PartialEq for Vertex<'a, 'b> { +impl PartialEq for Vertex<'_, '_> { fn eq(&self, other: &Self) -> bool { // Strictly speaking, we should compare the whole // EnteredDelimiter stack, not just the immediate @@ -170,7 +171,9 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { } } -impl<'a, 'b> Hash for Vertex<'a, 'b> { +impl Eq for Vertex<'_, '_> {} + +impl Hash for Vertex<'_, '_> { fn hash(&self, state: &mut H) { self.lhs_syntax.hash(state); self.rhs_syntax.hash(state); @@ -178,7 +181,7 @@ impl<'a, 'b> Hash for Vertex<'a, 'b> { } } -impl<'a, 'b> Default for Vertex<'a, 'b> { +impl Default for Vertex<'_, '_> { fn default() -> Self { Self::new(None, None) } @@ -474,7 +477,7 @@ pub fn set_neighbours<'syn, 'b>( alloc: &'b Bump, seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, ) { - if v.neighbours.borrow().is_some() { + if unsafe { (&*v.neighbours.get()).is_some() } { return; } @@ -735,7 +738,7 @@ pub fn set_neighbours<'syn, 'b>( "Must always find some next steps if node is not the end" ); - v.neighbours.replace(Some(res)); + unsafe { *v.neighbours.get() = Some(res) }; } pub fn populate_change_map<'a, 'b>( From 7d36c5f807e9bf361f77a2ac0cdccca63ae7bf1d Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 11:36:55 +0800 Subject: [PATCH 10/58] Refactor seen map --- src/diff/graph.rs | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7879071f7f..136ad38faf 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -425,38 +425,28 @@ impl Edge { } } +type SeenMap<'syn, 'b> = + FxHashMap<&'b Vertex<'syn, 'b>, (Option<&'b Vertex<'syn, 'b>>, Option<&'b Vertex<'syn, 'b>>)>; + fn allocate_if_new<'syn, 'b>( v: Vertex<'syn, 'b>, alloc: &'b Bump, - seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, + seen: &mut SeenMap<'syn, 'b>, ) -> &'b Vertex<'syn, 'b> { match seen.get_mut(&v) { - Some(existing) => { - // Don't explore more than two possible parenthesis - // nestings for each syntax node pair. - if let Some(allocated) = existing.last() { - if existing.len() >= 2 { - return *allocated; - } - } - - // If we have seen exactly this graph node before, even - // considering parenthesis matching, return it. - for existing_node in existing.iter() { - if existing_node.parents == v.parents { - return existing_node; - } + Some(existing) => match existing { + (Some(_), Some(v2)) => v2, + (Some(v1), None) if v1.parents == v.parents => v1, + (Some(_), sv2) => { + let allocated = alloc.alloc(v); + *sv2 = Some(allocated); + allocated } - - // We haven't reached the graph node limit yet, allocate a - // new one. - let allocated = alloc.alloc(v); - existing.push(allocated); - allocated - } + _ => unreachable!(), + }, None => { let allocated = alloc.alloc(v); - seen.insert(allocated, vec![allocated]); + seen.insert(allocated, (Some(allocated), None)); allocated } } @@ -475,7 +465,7 @@ fn looks_like_punctuation(content: &str) -> bool { pub fn set_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, - seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, + seen: &mut SeenMap<'syn, 'b>, ) { if unsafe { (&*v.neighbours.get()).is_some() } { return; From 2293886263222a8a7487345060eb23cb9512991c Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 12:51:07 +0800 Subject: [PATCH 11/58] Minor fixes --- src/diff/graph.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 136ad38faf..300b0b82c5 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -35,7 +35,7 @@ pub struct SyntaxRefOrId<'a> { phantom: PhantomData<&'a Syntax<'a>>, } -impl SyntaxRefOrId<'_> { +impl<'a> SyntaxRefOrId<'a> { pub fn is_ref(&self) -> bool { self.data & 1 == 0 } @@ -44,7 +44,7 @@ impl SyntaxRefOrId<'_> { self.data & 1 == 1 } - pub fn get_ref<'a>(&self) -> Option<&'a Syntax<'a>> { + pub fn get_ref(&self) -> Option<&'a Syntax<'a>> { if self.is_ref() { Some(unsafe { transmute_copy(&self.data) }) } else { @@ -56,7 +56,7 @@ impl SyntaxRefOrId<'_> { impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { fn from(s: &'a Syntax<'a>) -> Self { Self { - data: unsafe { transmute_copy(&s) }, + data: s as *const _ as _, phantom: PhantomData, } } @@ -77,10 +77,8 @@ fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { } fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SyntaxRefOrId<'a> { - children - .get(0) - .copied() - .map_or(Some(syntax.id()).into(), |s| s.into()) + let child = children.get(0).copied(); + child.map_or(Some(syntax.id()).into(), |s| s.into()) } /// A vertex in a directed acyclic graph that represents a diff. From 2c6b7060a35a088e4a194fde4c51b18004f64c7f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 19:20:22 +0800 Subject: [PATCH 12/58] Refactor parents representation --- src/diff/graph.rs | 215 ++++++++++++++++++---------------------------- src/diff/stack.rs | 20 ++--- 2 files changed, 94 insertions(+), 141 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 300b0b82c5..21255ef77f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -5,7 +5,6 @@ use rustc_hash::FxHashMap; use std::{ cell::{Cell, UnsafeCell}, cmp::min, - fmt, hash::{Hash, Hasher}, marker::PhantomData, mem::transmute_copy, @@ -117,25 +116,34 @@ pub struct Vertex<'a, 'b> { pub predecessor: Cell>>, pub lhs_syntax: SyntaxRefOrId<'a>, pub rhs_syntax: SyntaxRefOrId<'a>, - parents: Stack>, + lhs_parents: Stack>, + rhs_parents: Stack>, } impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { - self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() + self.lhs_syntax.is_id() + && self.rhs_syntax.is_id() + && self.lhs_parents.is_empty() + && self.rhs_parents.is_empty() } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { - let parents = Stack::new(); Vertex { neighbours: UnsafeCell::new(None), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), - parents, + lhs_parents: Stack::new(), + rhs_parents: Stack::new(), } } + + fn can_pop_either_parent(&self) -> bool { + matches!(self.lhs_parents.peek(), Some(PopEither(_))) + || matches!(self.rhs_parents.peek(), Some(PopEither(_))) + } } impl PartialEq for Vertex<'_, '_> { @@ -165,7 +173,7 @@ impl PartialEq for Vertex<'_, '_> { self.lhs_syntax == other.lhs_syntax && self.rhs_syntax == other.rhs_syntax - && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) + && self.can_pop_either_parent() == other.can_pop_either_parent() } } @@ -175,7 +183,7 @@ impl Hash for Vertex<'_, '_> { fn hash(&self, state: &mut H) { self.lhs_syntax.hash(state); self.rhs_syntax.hash(state); - can_pop_either_parent(&self.parents).hash(state); + self.can_pop_either_parent().hash(state); } } @@ -186,13 +194,13 @@ impl Default for Vertex<'_, '_> { } /// Tracks entering syntax List nodes. -#[derive(Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] enum EnteredDelimiter<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. /// /// Assumes that at least one stack is non-empty. - PopEither((Stack<&'a Syntax<'a>>, Stack<&'a Syntax<'a>>)), + PopEither(&'a Syntax<'a>), /// If we've entered the LHS and RHS together, we must pop both /// sides together too. Otherwise we'd consider the following case to have no changes. /// @@ -200,126 +208,59 @@ enum EnteredDelimiter<'a> { /// Old: (a b c) /// New: (a b) c /// ``` - PopBoth((&'a Syntax<'a>, &'a Syntax<'a>)), + PopBoth(&'a Syntax<'a>), } -impl<'a> fmt::Debug for EnteredDelimiter<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let desc = match self { - EnteredDelimiter::PopEither((lhs_delims, rhs_delims)) => { - format!( - "PopEither(lhs count: {}, rhs count: {})", - lhs_delims.size(), - rhs_delims.size() - ) - } - EnteredDelimiter::PopBoth(_) => "PopBoth".to_string(), - }; - f.write_str(&desc) - } -} +use EnteredDelimiter::*; -fn push_both_delimiters<'a>( - entered: &Stack>, +fn push_both<'a>( + lhs_entered: &Stack>, + rhs_entered: &Stack>, lhs_delim: &'a Syntax<'a>, rhs_delim: &'a Syntax<'a>, -) -> Stack> { - entered.push(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) -} - -fn can_pop_either_parent(entered: &Stack) -> bool { - matches!(entered.peek(), Some(EnteredDelimiter::PopEither(_))) +) -> (Stack>, Stack>) { + ( + lhs_entered.push(PopBoth(lhs_delim)), + rhs_entered.push(PopBoth(rhs_delim)), + ) } -fn try_pop_both<'a>( +fn push_either<'a>( entered: &Stack>, -) -> Option<(&'a Syntax<'a>, &'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) => { - Some((lhs_delim, rhs_delim, entered.pop().unwrap())) - } - _ => None, - } + delim: &'a Syntax<'a>, +) -> Stack> { + entered.push(PopEither(delim)) } -fn try_pop_lhs<'a>( - entered: &Stack>, -) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match lhs_delims.peek() { - Some(lhs_delim) => { - let mut entered = entered.pop().unwrap(); - let new_lhs_delims = lhs_delims.pop().unwrap(); - - if !new_lhs_delims.is_empty() || !rhs_delims.is_empty() { - entered = entered.push(EnteredDelimiter::PopEither(( - new_lhs_delims, - rhs_delims.clone(), - ))); - } - - Some((lhs_delim, entered)) - } - None => None, - }, +fn try_pop_both<'a>( + lhs_entered: &Stack>, + rhs_entered: &Stack>, +) -> Option<( + &'a Syntax<'a>, + &'a Syntax<'a>, + Stack>, + Stack>, +)> { + match (lhs_entered.peek(), rhs_entered.peek()) { + (Some(PopBoth(lhs_delim)), Some(PopBoth(rhs_delim))) => Some(( + lhs_delim, + rhs_delim, + lhs_entered.pop().unwrap(), + rhs_entered.pop().unwrap(), + )), _ => None, } } -fn try_pop_rhs<'a>( +fn try_pop_either<'a>( entered: &Stack>, ) -> Option<(&'a Syntax<'a>, Stack>)> { match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match rhs_delims.peek() { - Some(rhs_delim) => { - let mut entered = entered.pop().unwrap(); - let new_rhs_delims = rhs_delims.pop().unwrap(); - - if !lhs_delims.is_empty() || !new_rhs_delims.is_empty() { - entered = entered.push(EnteredDelimiter::PopEither(( - lhs_delims.clone(), - new_rhs_delims, - ))); - } - - Some((rhs_delim, entered)) - } - None => None, - }, + Some(PopEither(delim)) => Some((delim, entered.pop().unwrap())), _ => None, } } -fn push_lhs_delimiter<'a>( - entered: &Stack>, - delimiter: &'a Syntax<'a>, -) -> Stack> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( - EnteredDelimiter::PopEither((lhs_delims.push(delimiter), rhs_delims.clone())), - ), - _ => entered.push(EnteredDelimiter::PopEither(( - Stack::new().push(delimiter), - Stack::new(), - ))), - } -} - -fn push_rhs_delimiter<'a>( - entered: &Stack>, - delimiter: &'a Syntax<'a>, -) -> Stack> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( - EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter))), - ), - _ => entered.push(EnteredDelimiter::PopEither(( - Stack::new(), - Stack::new().push(delimiter), - ))), - } -} - /// An edge in our graph, with an associated [`cost`](Edge::cost). /// /// A syntax node can always be marked as novel, so a vertex will have @@ -434,11 +375,14 @@ fn allocate_if_new<'syn, 'b>( match seen.get_mut(&v) { Some(existing) => match existing { (Some(_), Some(v2)) => v2, - (Some(v1), None) if v1.parents == v.parents => v1, - (Some(_), sv2) => { - let allocated = alloc.alloc(v); - *sv2 = Some(allocated); - allocated + (Some(v1), sv2) => { + if v1.lhs_parents == v.lhs_parents && v1.rhs_parents == v.rhs_parents { + v1 + } else { + let allocated = alloc.alloc(v); + *sv2 = Some(allocated); + allocated + } } _ => unreachable!(), }, @@ -472,7 +416,9 @@ pub fn set_neighbours<'syn, 'b>( let mut res: Vec<(Edge, &Vertex)> = vec![]; if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { - if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&v.parents) { + if let Some((lhs_parent, rhs_parent, lhs_parents_next, rhs_parents_next)) = + try_pop_both(&v.lhs_parents, &v.rhs_parents) + { // We have exhausted all the nodes on both lists, so we can // move up to the parent node. @@ -483,7 +429,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_parent), rhs_syntax: next_sibling_syntax(rhs_parent), - parents: parents_next, + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, ..Vertex::default() }, alloc, @@ -494,7 +441,7 @@ pub fn set_neighbours<'syn, 'b>( } if v.lhs_syntax.is_id() { - if let Some((lhs_parent, parents_next)) = try_pop_lhs(&v.parents) { + if let Some((lhs_parent, lhs_parents_next)) = try_pop_either(&v.lhs_parents) { // Move to next after LHS parent. // Continue from sibling of parent. @@ -504,7 +451,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, - parents: parents_next, + lhs_parents: lhs_parents_next, + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -515,7 +463,7 @@ pub fn set_neighbours<'syn, 'b>( } if v.rhs_syntax.is_id() { - if let Some((rhs_parent, parents_next)) = try_pop_rhs(&v.parents) { + if let Some((rhs_parent, rhs_parents_next)) = try_pop_either(&v.rhs_parents) { // Move to next after RHS parent. // Continue from sibling of parent. @@ -525,7 +473,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(rhs_parent), - parents: parents_next, + lhs_parents: v.lhs_parents.clone(), + rhs_parents: rhs_parents_next, ..Vertex::default() }, alloc, @@ -548,7 +497,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -575,7 +525,8 @@ pub fn set_neighbours<'syn, 'b>( // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { // TODO: be consistent between parents_next and next_parents. - let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax); + let (lhs_parents_next, rhs_parents_next) = + push_both(&v.lhs_parents, &v.rhs_parents, lhs_syntax, rhs_syntax); let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -587,7 +538,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - parents: parents_next, + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, ..Vertex::default() }, alloc, @@ -621,7 +573,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -647,7 +600,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -657,8 +611,6 @@ pub fn set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax); - res.push(( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), @@ -667,7 +619,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, - parents: parents_next, + lhs_parents: push_either(&v.lhs_parents, lhs_syntax), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -691,7 +644,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(rhs_syntax), - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -701,8 +655,6 @@ pub fn set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax); - res.push(( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), @@ -711,7 +663,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_child_syntax(rhs_syntax, children), - parents: parents_next, + lhs_parents: v.lhs_parents.clone(), + rhs_parents: push_either(&v.rhs_parents, rhs_syntax), ..Vertex::default() }, alloc, diff --git a/src/diff/stack.rs b/src/diff/stack.rs index b366937a4e..58dad1ca9c 100644 --- a/src/diff/stack.rs +++ b/src/diff/stack.rs @@ -35,16 +35,16 @@ impl Stack { } } - // O(n) - pub fn size(&self) -> usize { - let mut res = 0; - let mut node = &self.head; - while let Some(next) = node { - res += 1; - node = &next.next; - } - res - } + // // O(n) + // pub fn size(&self) -> usize { + // let mut res = 0; + // let mut node = &self.head; + // while let Some(next) = node { + // res += 1; + // node = &next.next; + // } + // res + // } pub fn is_empty(&self) -> bool { self.head.is_none() From cb1c3e0ea38d31735b3f5433fe6582c137b22a2f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 20:08:48 +0800 Subject: [PATCH 13/58] Compress EnteredDelimiter --- src/diff/graph.rs | 58 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 21255ef77f..abe79003c9 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -141,8 +141,8 @@ impl<'a, 'b> Vertex<'a, 'b> { } fn can_pop_either_parent(&self) -> bool { - matches!(self.lhs_parents.peek(), Some(PopEither(_))) - || matches!(self.rhs_parents.peek(), Some(PopEither(_))) + matches!(self.lhs_parents.peek().map(Into::into), Some(PopEither(_))) + || matches!(self.rhs_parents.peek().map(Into::into), Some(PopEither(_))) } } @@ -195,7 +195,7 @@ impl Default for Vertex<'_, '_> { /// Tracks entering syntax List nodes. #[derive(Debug, Clone, PartialEq, Eq)] -enum EnteredDelimiter<'a> { +enum EnteredDelimiterEnum<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. /// @@ -211,7 +211,44 @@ enum EnteredDelimiter<'a> { PopBoth(&'a Syntax<'a>), } -use EnteredDelimiter::*; +use EnteredDelimiterEnum::*; + +/// Compress `EnteredDelimiterEnum` into a usize. +/// +/// Utilize the LSB as flag since `Syntax` is aligned. +/// +/// ```text +/// LSB = 0 -> PopEither +/// LSB = 1 -> PopBoth +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EnteredDelimiter<'a> { + data: usize, + phantom: PhantomData<&'a Syntax<'a>>, +} + +impl<'a> From<&EnteredDelimiter<'a>> for EnteredDelimiterEnum<'a> { + fn from(delim: &EnteredDelimiter<'a>) -> Self { + if delim.data & 1 == 0 { + PopEither(unsafe { transmute_copy(&delim.data) }) + } else { + PopBoth(unsafe { transmute_copy(&(delim.data ^ 1)) }) + } + } +} + +impl<'a> From<&EnteredDelimiterEnum<'a>> for EnteredDelimiter<'a> { + fn from(delim: &EnteredDelimiterEnum<'a>) -> Self { + let data = match *delim { + PopEither(s) => s as *const _ as usize, + PopBoth(s) => s as *const _ as usize | 1, + }; + EnteredDelimiter { + data, + phantom: PhantomData, + } + } +} fn push_both<'a>( lhs_entered: &Stack>, @@ -220,8 +257,8 @@ fn push_both<'a>( rhs_delim: &'a Syntax<'a>, ) -> (Stack>, Stack>) { ( - lhs_entered.push(PopBoth(lhs_delim)), - rhs_entered.push(PopBoth(rhs_delim)), + lhs_entered.push((&PopBoth(lhs_delim)).into()), + rhs_entered.push((&PopBoth(rhs_delim)).into()), ) } @@ -229,7 +266,7 @@ fn push_either<'a>( entered: &Stack>, delim: &'a Syntax<'a>, ) -> Stack> { - entered.push(PopEither(delim)) + entered.push((&PopEither(delim)).into()) } fn try_pop_both<'a>( @@ -241,7 +278,10 @@ fn try_pop_both<'a>( Stack>, Stack>, )> { - match (lhs_entered.peek(), rhs_entered.peek()) { + match ( + lhs_entered.peek().map(Into::into), + rhs_entered.peek().map(Into::into), + ) { (Some(PopBoth(lhs_delim)), Some(PopBoth(rhs_delim))) => Some(( lhs_delim, rhs_delim, @@ -255,7 +295,7 @@ fn try_pop_both<'a>( fn try_pop_either<'a>( entered: &Stack>, ) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek() { + match entered.peek().map(Into::into) { Some(PopEither(delim)) => Some((delim, entered.pop().unwrap())), _ => None, } From 4e450cb4c1cf88b03ac00ab26129fb7d3f6f4c48 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 20:23:28 +0800 Subject: [PATCH 14/58] Simplify set_neighbours by extracting a lambda --- src/diff/graph.rs | 229 ++++++++++++++++++++-------------------------- 1 file changed, 98 insertions(+), 131 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index abe79003c9..227aed8789 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -407,6 +407,7 @@ impl Edge { type SeenMap<'syn, 'b> = FxHashMap<&'b Vertex<'syn, 'b>, (Option<&'b Vertex<'syn, 'b>>, Option<&'b Vertex<'syn, 'b>>)>; +#[inline(never)] fn allocate_if_new<'syn, 'b>( v: Vertex<'syn, 'b>, alloc: &'b Bump, @@ -453,7 +454,12 @@ pub fn set_neighbours<'syn, 'b>( return; } - let mut res: Vec<(Edge, &Vertex)> = vec![]; + let mut res = vec![]; + + let mut add_neighbor = std::convert::identity( + #[inline(always)] + |edge, vertex| res.push((edge, allocate_if_new(vertex, alloc, seen))), + ); if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { if let Some((lhs_parent, rhs_parent, lhs_parents_next, rhs_parents_next)) = @@ -463,20 +469,16 @@ pub fn set_neighbours<'syn, 'b>( // move up to the parent node. // Continue from sibling of parent. - res.push(( + add_neighbor( ExitDelimiterBoth, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: lhs_parents_next, - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, + ..Vertex::default() + }, + ); } } @@ -485,20 +487,16 @@ pub fn set_neighbours<'syn, 'b>( // Move to next after LHS parent. // Continue from sibling of parent. - res.push(( + add_neighbor( ExitDelimiterLHS, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: v.rhs_syntax, - lhs_parents: lhs_parents_next, - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: v.rhs_syntax, + lhs_parents: lhs_parents_next, + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } } @@ -507,20 +505,16 @@ pub fn set_neighbours<'syn, 'b>( // Move to next after RHS parent. // Continue from sibling of parent. - res.push(( + add_neighbor( ExitDelimiterRHS, - allocate_if_new( - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_sibling_syntax(rhs_parent), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: rhs_parents_next, + ..Vertex::default() + }, + ); } } @@ -531,20 +525,16 @@ pub fn set_neighbours<'syn, 'b>( .unsigned_abs(); // Both nodes are equal, the happy case. - res.push(( + add_neighbor( UnchangedNode { depth_difference }, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } if let ( @@ -572,20 +562,16 @@ pub fn set_neighbours<'syn, 'b>( - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); - res.push(( + add_neighbor( EnterUnchangedDelimiter { depth_difference }, - allocate_if_new( - Vertex { - lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), - rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - lhs_parents: lhs_parents_next, - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), + rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, + ..Vertex::default() + }, + ); } } @@ -607,20 +593,17 @@ pub fn set_neighbours<'syn, 'b>( if lhs_content != rhs_content { let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - res.push(( + + add_neighbor( ReplacedComment { levenshtein_pct }, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } } } @@ -629,44 +612,36 @@ pub fn set_neighbours<'syn, 'b>( match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { - res.push(( + add_neighbor( NovelAtomLHS { // TODO: should this apply if prev is a parent // node rather than a sibling? contiguous: lhs_syntax.prev_is_contiguous(), probably_punctuation: looks_like_punctuation(content), }, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: v.rhs_syntax, - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: v.rhs_syntax, + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - res.push(( + add_neighbor( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), }, - allocate_if_new( - Vertex { - lhs_syntax: next_child_syntax(lhs_syntax, children), - rhs_syntax: v.rhs_syntax, - lhs_parents: push_either(&v.lhs_parents, lhs_syntax), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_child_syntax(lhs_syntax, children), + rhs_syntax: v.rhs_syntax, + lhs_parents: push_either(&v.lhs_parents, lhs_syntax), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } } } @@ -675,42 +650,34 @@ pub fn set_neighbours<'syn, 'b>( match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { - res.push(( + add_neighbor( NovelAtomRHS { contiguous: rhs_syntax.prev_is_contiguous(), probably_punctuation: looks_like_punctuation(content), }, - allocate_if_new( - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - res.push(( + add_neighbor( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), }, - allocate_if_new( - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_child_syntax(rhs_syntax, children), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: push_either(&v.rhs_parents, rhs_syntax), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_child_syntax(rhs_syntax, children), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: push_either(&v.rhs_parents, rhs_syntax), + ..Vertex::default() + }, + ); } } } From b0ab6c899a369cab73d79304002563fe08bae10d Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 20:30:20 +0800 Subject: [PATCH 15/58] Reserve vec capacity --- src/diff/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 227aed8789..cb26973a2e 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -454,7 +454,7 @@ pub fn set_neighbours<'syn, 'b>( return; } - let mut res = vec![]; + let mut res = Vec::with_capacity(4); let mut add_neighbor = std::convert::identity( #[inline(always)] From 5043858b87229da889e20a8923357457fb32c766 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 22:33:43 +0800 Subject: [PATCH 16/58] Remove transmute_copy --- src/diff/graph.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index cb26973a2e..c34e58999f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -7,7 +7,6 @@ use std::{ cmp::min, hash::{Hash, Hasher}, marker::PhantomData, - mem::transmute_copy, }; use strsim::normalized_levenshtein; @@ -45,7 +44,7 @@ impl<'a> SyntaxRefOrId<'a> { pub fn get_ref(&self) -> Option<&'a Syntax<'a>> { if self.is_ref() { - Some(unsafe { transmute_copy(&self.data) }) + Some(unsafe { &*(self.data as *const _) }) } else { None } @@ -230,9 +229,9 @@ pub struct EnteredDelimiter<'a> { impl<'a> From<&EnteredDelimiter<'a>> for EnteredDelimiterEnum<'a> { fn from(delim: &EnteredDelimiter<'a>) -> Self { if delim.data & 1 == 0 { - PopEither(unsafe { transmute_copy(&delim.data) }) + PopEither(unsafe { &*(delim.data as *const _) }) } else { - PopBoth(unsafe { transmute_copy(&(delim.data ^ 1)) }) + PopBoth(unsafe { &*((delim.data ^ 1) as *const _) }) } } } From 8327c99e7ba93af8d52c030f90bc873fa2b2adf2 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 00:47:13 +0800 Subject: [PATCH 17/58] Compress seen map --- Cargo.lock | 40 +++++++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + src/diff/dijkstra.rs | 5 ++--- src/diff/graph.rs | 37 +++++++++++++++++++------------------ 4 files changed, 61 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 794b75ed02..7f45c53f2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -183,6 +194,7 @@ dependencies = [ "cc", "clap", "const_format", + "hashbrown 0.12.3", "itertools", "lazy_static", "libc", @@ -260,12 +272,32 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -291,7 +323,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.11.2", ] [[package]] @@ -690,6 +722,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index b3db13bf4a..e64d630edb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ clap = { version = "3.1.8", features = ["cargo", "env", "wrap_help"] } itertools = "0.10.1" typed-arena = "2.0.1" rustc-hash = "1.1.0" +hashbrown = "0.12.3" strsim = "0.10.0" lazy_static = "1.4.0" atty = "0.2.14" diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 2c2b911968..2eb95913ff 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,13 +5,12 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, + diff::graph::{populate_change_map, set_neighbours, Edge, SeenMap, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; use itertools::Itertools; use radix_heap::RadixHeapMap; -use rustc_hash::FxHashMap; #[derive(Debug)] pub struct ExceededGraphLimit {} @@ -30,7 +29,7 @@ fn shortest_vertex_path<'a, 'b>( heap.push(Reverse(0), start); - let mut seen = FxHashMap::default(); + let mut seen = SeenMap::default(); seen.reserve(size_hint); let end: &'b Vertex<'a, 'b> = loop { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index c34e58999f..bf3c4a4cd3 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -1,11 +1,11 @@ //! A graph representation for computing tree diffs. use bumpalo::Bump; -use rustc_hash::FxHashMap; +use rustc_hash::FxHasher; use std::{ cell::{Cell, UnsafeCell}, cmp::min, - hash::{Hash, Hasher}, + hash::{BuildHasherDefault, Hash, Hasher}, marker::PhantomData, }; use strsim::normalized_levenshtein; @@ -403,8 +403,11 @@ impl Edge { } } -type SeenMap<'syn, 'b> = - FxHashMap<&'b Vertex<'syn, 'b>, (Option<&'b Vertex<'syn, 'b>>, Option<&'b Vertex<'syn, 'b>>)>; +pub type SeenMap<'syn, 'b> = hashbrown::HashMap< + &'b Vertex<'syn, 'b>, + Option<&'b Vertex<'syn, 'b>>, + BuildHasherDefault, +>; #[inline(never)] fn allocate_if_new<'syn, 'b>( @@ -412,23 +415,21 @@ fn allocate_if_new<'syn, 'b>( alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, ) -> &'b Vertex<'syn, 'b> { - match seen.get_mut(&v) { - Some(existing) => match existing { - (Some(_), Some(v2)) => v2, - (Some(v1), sv2) => { - if v1.lhs_parents == v.lhs_parents && v1.rhs_parents == v.rhs_parents { - v1 - } else { - let allocated = alloc.alloc(v); - *sv2 = Some(allocated); - allocated - } + match seen.get_key_value_mut(&v) { + Some((key, value)) => { + if let Some(existing) = value { + return existing; + } + if key.lhs_parents == v.lhs_parents && key.rhs_parents == v.rhs_parents { + return key; } - _ => unreachable!(), - }, + let allocated = alloc.alloc(v); + *value = Some(allocated); + allocated + } None => { let allocated = alloc.alloc(v); - seen.insert(allocated, (Some(allocated), None)); + seen.insert(allocated, None); allocated } } From 3612d0844af5928416c2237e57a0c9ad000a0ceb Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 01:14:02 +0800 Subject: [PATCH 18/58] Skip visited vertices --- src/diff/dijkstra.rs | 4 +++- src/diff/graph.rs | 12 ++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 2eb95913ff..0b517b4ec1 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -38,7 +38,9 @@ fn shortest_vertex_path<'a, 'b>( if current.is_end() { break current; } - + if unsafe { (&*current.neighbours.get()).is_some() } { + continue; // visited + } set_neighbours(current, vertex_arena, &mut seen); if let Some(neighbors) = unsafe { &*current.neighbours.get() } { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index bf3c4a4cd3..1cd6973187 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -450,15 +450,11 @@ pub fn set_neighbours<'syn, 'b>( alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, ) { - if unsafe { (&*v.neighbours.get()).is_some() } { - return; - } - - let mut res = Vec::with_capacity(4); + let mut neighbors = Vec::with_capacity(4); let mut add_neighbor = std::convert::identity( #[inline(always)] - |edge, vertex| res.push((edge, allocate_if_new(vertex, alloc, seen))), + |edge, vertex| neighbors.push((edge, allocate_if_new(vertex, alloc, seen))), ); if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { @@ -682,11 +678,11 @@ pub fn set_neighbours<'syn, 'b>( } } assert!( - !res.is_empty(), + !neighbors.is_empty(), "Must always find some next steps if node is not the end" ); - unsafe { *v.neighbours.get() = Some(res) }; + unsafe { *v.neighbours.get() = Some(neighbors) }; } pub fn populate_change_map<'a, 'b>( From 9e11a2213ff9168500389145f34a6784dea9e89b Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 02:21:38 +0800 Subject: [PATCH 19/58] Refactor shortest path algorithm --- src/diff/dijkstra.rs | 89 +++++++++----------------------------------- src/diff/graph.rs | 15 ++++---- 2 files changed, 25 insertions(+), 79 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 0b517b4ec1..00bdec4f0a 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,7 +5,7 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{populate_change_map, set_neighbours, Edge, SeenMap, Vertex}, + diff::graph::{get_neighbours, populate_change_map, Edge, SeenMap, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; @@ -21,7 +21,7 @@ fn shortest_vertex_path<'a, 'b>( vertex_arena: &'b Bump, size_hint: usize, graph_limit: usize, -) -> Result>, ExceededGraphLimit> { +) -> Result)>, ExceededGraphLimit> { // We want to visit nodes with the shortest distance first, but // RadixHeapMap is a max-heap. Ensure nodes are wrapped with // Reverse to flip comparisons. @@ -35,23 +35,22 @@ fn shortest_vertex_path<'a, 'b>( let end: &'b Vertex<'a, 'b> = loop { match heap.pop() { Some((Reverse(distance), current)) => { + if current.is_visited.get() { + continue; + } + current.is_visited.set(true); + if current.is_end() { break current; } - if unsafe { (&*current.neighbours.get()).is_some() } { - continue; // visited - } - set_neighbours(current, vertex_arena, &mut seen); - if let Some(neighbors) = unsafe { &*current.neighbours.get() } { - for &(edge, next) in neighbors { - let distance_to_next = distance + edge.cost(); + for (edge, next) in get_neighbours(current, vertex_arena, &mut seen) { + let distance_to_next = distance + edge.cost(); - if distance_to_next < next.shortest_distance.get() { - next.shortest_distance.set(distance_to_next); - next.predecessor.replace(Some(current)); - heap.push(Reverse(distance_to_next), next); - } + if distance_to_next < next.shortest_distance.get() { + next.shortest_distance.set(distance_to_next); + next.predecessor.set(Some((edge, current))); + heap.push(Reverse(distance_to_next), next); } } @@ -70,10 +69,10 @@ fn shortest_vertex_path<'a, 'b>( heap.len(), ); - let mut current = Some(end); - let mut vertex_route: Vec<&'b Vertex<'a, 'b>> = vec![]; - while let Some(node) = current { - vertex_route.push(node); + let mut current = end.predecessor.get(); + let mut vertex_route = vec![]; + while let Some((edge, node)) = current { + vertex_route.push((edge, node)); current = node.predecessor.get(); } @@ -81,26 +80,6 @@ fn shortest_vertex_path<'a, 'b>( Ok(vertex_route) } -fn shortest_path_with_edges<'a, 'b>( - route: &[&'b Vertex<'a, 'b>], -) -> Vec<(Edge, &'b Vertex<'a, 'b>)> { - let mut prev = route.first().expect("Expected non-empty route"); - - let mut cost = 0; - let mut res = vec![]; - - for vertex in route.iter().skip(1) { - let edge = edge_between(prev, vertex); - res.push((edge, *prev)); - cost += edge.cost(); - - prev = vertex; - } - debug!("Found a path of {} with cost {}.", route.len(), cost); - - res -} - /// Return the shortest route from the `start` to the end vertex. /// /// The vec returned does not return the very last vertex. This is @@ -112,39 +91,7 @@ fn shortest_path<'a, 'b>( graph_limit: usize, ) -> Result)>, ExceededGraphLimit> { let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start); - let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?; - Ok(shortest_path_with_edges(&vertex_path)) -} - -fn edge_between<'a, 'b>(before: &Vertex<'a, 'b>, after: &Vertex<'a, 'b>) -> Edge { - assert_ne!(before, after); - - let mut shortest_edge: Option = None; - if let Some(neighbours) = unsafe { &*before.neighbours.get() } { - for &(edge, next) in neighbours { - // If there are multiple edges that can take us to `next`, - // prefer the shortest. - if *next == *after { - let is_shorter = match shortest_edge { - Some(prev_edge) => edge.cost() < prev_edge.cost(), - None => true, - }; - - if is_shorter { - shortest_edge = Some(edge); - } - } - } - } - - if let Some(edge) = shortest_edge { - return edge; - } - - panic!( - "Expected a route between the two vertices {:#?} and {:#?}", - before, after - ); + shortest_vertex_path(start, vertex_arena, size_hint, graph_limit) } /// What is the total number of AST nodes? diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 1cd6973187..a43b8b0432 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -3,7 +3,7 @@ use bumpalo::Bump; use rustc_hash::FxHasher; use std::{ - cell::{Cell, UnsafeCell}, + cell::Cell, cmp::min, hash::{BuildHasherDefault, Hash, Hasher}, marker::PhantomData, @@ -109,10 +109,9 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> /// ``` #[derive(Debug)] pub struct Vertex<'a, 'b> { - // TODO: how to design the boundary of unsafe? - pub neighbours: UnsafeCell)>>>, + pub is_visited: Cell, pub shortest_distance: Cell, - pub predecessor: Cell>>, + pub predecessor: Cell)>>, pub lhs_syntax: SyntaxRefOrId<'a>, pub rhs_syntax: SyntaxRefOrId<'a>, lhs_parents: Stack>, @@ -129,7 +128,7 @@ impl<'a, 'b> Vertex<'a, 'b> { pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { Vertex { - neighbours: UnsafeCell::new(None), + is_visited: Cell::new(false), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), @@ -445,11 +444,11 @@ fn looks_like_punctuation(content: &str) -> bool { /// Compute the neighbours of `v` if we haven't previously done so, /// write them to the .neighbours cell inside `v`, and return them. -pub fn set_neighbours<'syn, 'b>( +pub fn get_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, -) { +) -> Vec<(Edge, &'b Vertex<'syn, 'b>)> { let mut neighbors = Vec::with_capacity(4); let mut add_neighbor = std::convert::identity( @@ -682,7 +681,7 @@ pub fn set_neighbours<'syn, 'b>( "Must always find some next steps if node is not the end" ); - unsafe { *v.neighbours.get() = Some(neighbors) }; + neighbors } pub fn populate_change_map<'a, 'b>( From a47000f10262b1a1afa2f4dfafc88f074ed9aa31 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 20:23:38 +0800 Subject: [PATCH 20/58] Skip visited vertices when searching neighbors --- src/diff/graph.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index a43b8b0432..6eb5d360e8 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -453,7 +453,12 @@ pub fn get_neighbours<'syn, 'b>( let mut add_neighbor = std::convert::identity( #[inline(always)] - |edge, vertex| neighbors.push((edge, allocate_if_new(vertex, alloc, seen))), + |edge, vertex| { + let neighbor = allocate_if_new(vertex, alloc, seen); + if !neighbor.is_visited.get() { + neighbors.push((edge, neighbor)); + } + }, ); if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { @@ -676,10 +681,6 @@ pub fn get_neighbours<'syn, 'b>( } } } - assert!( - !neighbors.is_empty(), - "Must always find some next steps if node is not the end" - ); neighbors } From 88ba693a8f16a22925bf4b54b289d489bdb5d268 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 20:47:26 +0800 Subject: [PATCH 21/58] Simplify code --- src/diff/dijkstra.rs | 25 ++---- src/diff/graph.rs | 181 +++++++++++++++---------------------------- 2 files changed, 69 insertions(+), 137 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 00bdec4f0a..3a47a04782 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -15,9 +15,12 @@ use radix_heap::RadixHeapMap; #[derive(Debug)] pub struct ExceededGraphLimit {} -/// Return the shortest route from `start` to the end vertex. -fn shortest_vertex_path<'a, 'b>( - start: &'b Vertex<'a, 'b>, +/// Return the shortest route from the `start` to the end vertex. +/// +/// The vec returned does not return the very last vertex. This is +/// necessary because a route of N vertices only has N-1 edges. +fn shortest_path<'a, 'b>( + start: Vertex<'a, 'b>, vertex_arena: &'b Bump, size_hint: usize, graph_limit: usize, @@ -27,7 +30,7 @@ fn shortest_vertex_path<'a, 'b>( // Reverse to flip comparisons. let mut heap: RadixHeapMap, &'b Vertex<'a, 'b>> = RadixHeapMap::new(); - heap.push(Reverse(0), start); + heap.push(Reverse(0), vertex_arena.alloc(start)); let mut seen = SeenMap::default(); seen.reserve(size_hint); @@ -80,20 +83,6 @@ fn shortest_vertex_path<'a, 'b>( Ok(vertex_route) } -/// Return the shortest route from the `start` to the end vertex. -/// -/// The vec returned does not return the very last vertex. This is -/// necessary because a route of N vertices only has N-1 edges. -fn shortest_path<'a, 'b>( - start: Vertex<'a, 'b>, - vertex_arena: &'b Bump, - size_hint: usize, - graph_limit: usize, -) -> Result)>, ExceededGraphLimit> { - let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start); - shortest_vertex_path(start, vertex_arena, size_hint, graph_limit) -} - /// What is the total number of AST nodes? fn node_count(root: Option<&Syntax>) -> u32 { let mut node = root; diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 6eb5d360e8..ef7353b543 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -21,7 +21,7 @@ use Edge::*; /// Compress `&Syntax` and `SyntaxId` into a usize. /// -/// Utilize the LSB as flag since `Syntax` is aligned. +/// Utilize the LSB as flag since `&Syntax` is aligned. /// /// ```text /// LSB = 0 -> &Syntax @@ -196,8 +196,6 @@ impl Default for Vertex<'_, '_> { enum EnteredDelimiterEnum<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. - /// - /// Assumes that at least one stack is non-empty. PopEither(&'a Syntax<'a>), /// If we've entered the LHS and RHS together, we must pop both /// sides together too. Otherwise we'd consider the following case to have no changes. @@ -213,7 +211,7 @@ use EnteredDelimiterEnum::*; /// Compress `EnteredDelimiterEnum` into a usize. /// -/// Utilize the LSB as flag since `Syntax` is aligned. +/// Utilize the LSB as flag since `&Syntax` is aligned. /// /// ```text /// LSB = 0 -> PopEither @@ -235,9 +233,9 @@ impl<'a> From<&EnteredDelimiter<'a>> for EnteredDelimiterEnum<'a> { } } -impl<'a> From<&EnteredDelimiterEnum<'a>> for EnteredDelimiter<'a> { - fn from(delim: &EnteredDelimiterEnum<'a>) -> Self { - let data = match *delim { +impl<'a> From> for EnteredDelimiter<'a> { + fn from(delim: EnteredDelimiterEnum<'a>) -> Self { + let data = match delim { PopEither(s) => s as *const _ as usize, PopBoth(s) => s as *const _ as usize | 1, }; @@ -248,57 +246,6 @@ impl<'a> From<&EnteredDelimiterEnum<'a>> for EnteredDelimiter<'a> { } } -fn push_both<'a>( - lhs_entered: &Stack>, - rhs_entered: &Stack>, - lhs_delim: &'a Syntax<'a>, - rhs_delim: &'a Syntax<'a>, -) -> (Stack>, Stack>) { - ( - lhs_entered.push((&PopBoth(lhs_delim)).into()), - rhs_entered.push((&PopBoth(rhs_delim)).into()), - ) -} - -fn push_either<'a>( - entered: &Stack>, - delim: &'a Syntax<'a>, -) -> Stack> { - entered.push((&PopEither(delim)).into()) -} - -fn try_pop_both<'a>( - lhs_entered: &Stack>, - rhs_entered: &Stack>, -) -> Option<( - &'a Syntax<'a>, - &'a Syntax<'a>, - Stack>, - Stack>, -)> { - match ( - lhs_entered.peek().map(Into::into), - rhs_entered.peek().map(Into::into), - ) { - (Some(PopBoth(lhs_delim)), Some(PopBoth(rhs_delim))) => Some(( - lhs_delim, - rhs_delim, - lhs_entered.pop().unwrap(), - rhs_entered.pop().unwrap(), - )), - _ => None, - } -} - -fn try_pop_either<'a>( - entered: &Stack>, -) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek().map(Into::into) { - Some(PopEither(delim)) => Some((delim, entered.pop().unwrap())), - _ => None, - } -} - /// An edge in our graph, with an associated [`cost`](Edge::cost). /// /// A syntax node can always be marked as novel, so a vertex will have @@ -442,8 +389,7 @@ fn looks_like_punctuation(content: &str) -> bool { content == "," || content == ";" || content == "." } -/// Compute the neighbours of `v` if we haven't previously done so, -/// write them to the .neighbours cell inside `v`, and return them. +/// Compute the neighbours of `v`. pub fn get_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, @@ -461,64 +407,63 @@ pub fn get_neighbours<'syn, 'b>( }, ); - if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { - if let Some((lhs_parent, rhs_parent, lhs_parents_next, rhs_parents_next)) = - try_pop_both(&v.lhs_parents, &v.rhs_parents) - { - // We have exhausted all the nodes on both lists, so we can - // move up to the parent node. + let v_info = ( + v.lhs_syntax.get_ref(), + v.rhs_syntax.get_ref(), + v.lhs_parents.peek().map(Into::into), + v.rhs_parents.peek().map(Into::into), + ); - // Continue from sibling of parent. - add_neighbor( - ExitDelimiterBoth, - Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: lhs_parents_next, - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - ); - } + if let (None, None, Some(PopBoth(lhs_parent)), Some(PopBoth(rhs_parent))) = v_info { + // We have exhausted all the nodes on both lists, so we can + // move up to the parent node. + + // Continue from sibling of parent. + add_neighbor( + ExitDelimiterBoth, + Vertex { + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), + lhs_parents: v.lhs_parents.pop().unwrap(), + rhs_parents: v.rhs_parents.pop().unwrap(), + ..Vertex::default() + }, + ); } - if v.lhs_syntax.is_id() { - if let Some((lhs_parent, lhs_parents_next)) = try_pop_either(&v.lhs_parents) { - // Move to next after LHS parent. - - // Continue from sibling of parent. - add_neighbor( - ExitDelimiterLHS, - Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: v.rhs_syntax, - lhs_parents: lhs_parents_next, - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - ); - } + if let (None, _, Some(PopEither(lhs_parent)), _) = v_info { + // Move to next after LHS parent. + + // Continue from sibling of parent. + add_neighbor( + ExitDelimiterLHS, + Vertex { + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: v.rhs_syntax, + lhs_parents: v.lhs_parents.pop().unwrap(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } - if v.rhs_syntax.is_id() { - if let Some((rhs_parent, rhs_parents_next)) = try_pop_either(&v.rhs_parents) { - // Move to next after RHS parent. - - // Continue from sibling of parent. - add_neighbor( - ExitDelimiterRHS, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - ); - } + if let (_, None, _, Some(PopEither(rhs_parent))) = v_info { + // Move to next after RHS parent. + + // Continue from sibling of parent. + add_neighbor( + ExitDelimiterRHS, + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_sibling_syntax(rhs_parent), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.pop().unwrap(), + ..Vertex::default() + }, + ); } - if let (Some(lhs_syntax), Some(rhs_syntax)) = (v.lhs_syntax.get_ref(), v.rhs_syntax.get_ref()) { + if let (Some(lhs_syntax), Some(rhs_syntax), _, _) = v_info { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -555,8 +500,6 @@ pub fn get_neighbours<'syn, 'b>( // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { // TODO: be consistent between parents_next and next_parents. - let (lhs_parents_next, rhs_parents_next) = - push_both(&v.lhs_parents, &v.rhs_parents, lhs_syntax, rhs_syntax); let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -567,8 +510,8 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - lhs_parents: lhs_parents_next, - rhs_parents: rhs_parents_next, + lhs_parents: v.lhs_parents.push(PopBoth(lhs_syntax).into()), + rhs_parents: v.rhs_parents.push(PopBoth(rhs_syntax).into()), ..Vertex::default() }, ); @@ -608,7 +551,7 @@ pub fn get_neighbours<'syn, 'b>( } } - if let Some(lhs_syntax) = v.lhs_syntax.get_ref() { + if let (Some(lhs_syntax), _, _, _) = v_info { match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -637,7 +580,7 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, - lhs_parents: push_either(&v.lhs_parents, lhs_syntax), + lhs_parents: v.lhs_parents.push(PopEither(lhs_syntax).into()), rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, @@ -646,7 +589,7 @@ pub fn get_neighbours<'syn, 'b>( } } - if let Some(rhs_syntax) = v.rhs_syntax.get_ref() { + if let (_, Some(rhs_syntax), _, _) = v_info { match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -674,7 +617,7 @@ pub fn get_neighbours<'syn, 'b>( lhs_syntax: v.lhs_syntax, rhs_syntax: next_child_syntax(rhs_syntax, children), lhs_parents: v.lhs_parents.clone(), - rhs_parents: push_either(&v.rhs_parents, rhs_syntax), + rhs_parents: v.rhs_parents.push(PopEither(rhs_syntax).into()), ..Vertex::default() }, ); From 5e5eef231d2b3a65e303d16c47964e7fcd38322a Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 07:30:38 +0800 Subject: [PATCH 22/58] Compress stack to bitmap --- src/diff/dijkstra.rs | 4 +- src/diff/graph.rs | 342 +++++++++++++++++++++++++------------------ src/diff/mod.rs | 1 - src/diff/stack.rs | 52 ------- 4 files changed, 198 insertions(+), 201 deletions(-) delete mode 100644 src/diff/stack.rs diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 3a47a04782..a898403a7c 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -155,10 +155,10 @@ pub fn mark_syntax<'a>( format!( "{:20} {:20} --- {:3} {:?}", x.1.lhs_syntax - .get_ref() + .get_side() .map_or_else(|| "None".into(), Syntax::dbg_content), x.1.rhs_syntax - .get_ref() + .get_side() .map_or_else(|| "None".into(), Syntax::dbg_content), x.0.cost(), x.0, diff --git a/src/diff/graph.rs b/src/diff/graph.rs index ef7353b543..17e65670c5 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -11,72 +11,117 @@ use std::{ use strsim::normalized_levenshtein; use crate::{ - diff::{ - changes::{insert_deep_unchanged, ChangeKind, ChangeMap}, - stack::Stack, - }, - parse::syntax::{AtomKind, Syntax, SyntaxId}, + diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}, + parse::syntax::{AtomKind, Syntax}, }; use Edge::*; -/// Compress `&Syntax` and `SyntaxId` into a usize. +/// Compress two `&Syntax` into a usize. /// /// Utilize the LSB as flag since `&Syntax` is aligned. /// /// ```text -/// LSB = 0 -> &Syntax -/// LSB = 1 -> SyntaxId * 2 + 1 +/// LSB = 0 -> side syntax +/// LSB = 1 -> optional parent syntax (side syntax is None) /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct SyntaxRefOrId<'a> { +pub struct SideSyntax<'a> { data: usize, phantom: PhantomData<&'a Syntax<'a>>, } -impl<'a> SyntaxRefOrId<'a> { - pub fn is_ref(&self) -> bool { +impl<'a> SideSyntax<'a> { + pub fn is_side(&self) -> bool { self.data & 1 == 0 } - pub fn is_id(&self) -> bool { - self.data & 1 == 1 - } - - pub fn get_ref(&self) -> Option<&'a Syntax<'a>> { - if self.is_ref() { + pub fn get_side(&self) -> Option<&'a Syntax<'a>> { + if self.is_side() { Some(unsafe { &*(self.data as *const _) }) } else { None } } -} -impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { - fn from(s: &'a Syntax<'a>) -> Self { + pub fn parent(&self) -> Option<&'a Syntax<'a>> { + match self.get_side() { + Some(side) => side.parent(), + None => match self.data ^ 1 { + 0 => None, + d => Some(unsafe { &*(d as *const _) }), + }, + } + } + + pub fn from_side(side: &'a Syntax<'a>) -> Self { Self { - data: s as *const _ as _, + data: side as *const _ as _, phantom: PhantomData, } } -} -impl From> for SyntaxRefOrId<'_> { - fn from(s: Option) -> Self { + pub fn from_parent(parent: Option<&'a Syntax<'a>>) -> Self { + let data = match parent { + Some(p) => p as *const _ as usize | 1, + None => 1, + }; Self { - data: s.map_or(0, |n| n.get() as usize) * 2 + 1, + data, phantom: PhantomData, } } } -fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { - let parent_id = syntax.parent().map(Syntax::id); - syntax.next_sibling().map_or(parent_id.into(), |n| n.into()) +fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { + let parent = SideSyntax::from_parent(syntax.parent()); + syntax.next_sibling().map_or(parent, SideSyntax::from_side) +} + +fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { + let parent = SideSyntax::from_parent(Some(syntax)); + children + .get(0) + .copied() + .map_or(parent, SideSyntax::from_side) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u64)] +enum EnteredDelimiter { + /// If we've entered the LHS or RHS separately, we can pop either + /// side independently. + PopEither = 0, + /// If we've entered the LHS and RHS together, we must pop both + /// sides together too. Otherwise we'd consider the following case to have no changes. + /// + /// ```text + /// Old: (a b c) + /// New: (a b) c + /// ``` + PopBoth = 1, } -fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SyntaxRefOrId<'a> { - let child = children.get(0).copied(); - child.map_or(Some(syntax.id()).into(), |s| s.into()) +use EnteredDelimiter::*; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct BitStack(u64); + +impl BitStack { + fn peek(&self) -> EnteredDelimiter { + if self.0 & 1 == PopEither as u64 { + PopEither + } else { + PopBoth + } + } + + fn push(&self, tag: EnteredDelimiter) -> Self { + Self((self.0 << 1) | tag as u64) + } + + fn pop(&self) -> Self { + Self(self.0 >> 1) + } } /// A vertex in a directed acyclic graph that represents a diff. @@ -112,18 +157,20 @@ pub struct Vertex<'a, 'b> { pub is_visited: Cell, pub shortest_distance: Cell, pub predecessor: Cell)>>, - pub lhs_syntax: SyntaxRefOrId<'a>, - pub rhs_syntax: SyntaxRefOrId<'a>, - lhs_parents: Stack>, - rhs_parents: Stack>, + pub lhs_syntax: SideSyntax<'a>, + pub rhs_syntax: SideSyntax<'a>, + lhs_parent_num: u8, + rhs_parent_num: u8, + lhs_parent_stack: BitStack, + rhs_parent_stack: BitStack, } impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { - self.lhs_syntax.is_id() - && self.rhs_syntax.is_id() - && self.lhs_parents.is_empty() - && self.rhs_parents.is_empty() + !self.lhs_syntax.is_side() + && !self.rhs_syntax.is_side() + && self.lhs_parent_num == 0 + && self.rhs_parent_num == 0 } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { @@ -131,16 +178,55 @@ impl<'a, 'b> Vertex<'a, 'b> { is_visited: Cell::new(false), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), - rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), - lhs_parents: Stack::new(), - rhs_parents: Stack::new(), + lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), + rhs_syntax: rhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), + lhs_parent_num: 0, + rhs_parent_num: 0, + lhs_parent_stack: BitStack(0), + rhs_parent_stack: BitStack(0), + } + } + + fn parents_eq(&self, other: &Vertex) -> bool { + fn parents_recur_eq(mut a: Option<&Syntax>, mut b: Option<&Syntax>) -> bool { + while let (Some(pa), Some(pb)) = (a, b) { + if pa.content_id() != pb.content_id() { + return false; + } + a = pa.parent(); + b = pb.parent(); + } + true } + + self.lhs_parent_num == other.lhs_parent_num + && self.rhs_parent_num == other.rhs_parent_num + && self.lhs_parent_stack == other.lhs_parent_stack + && self.rhs_parent_stack == other.rhs_parent_stack + && parents_recur_eq(self.lhs_syntax.parent(), other.lhs_syntax.parent()) + && parents_recur_eq(self.rhs_syntax.parent(), other.rhs_syntax.parent()) + } + + fn try_pop_tag(&self) -> (Option, Option) { + ( + if self.lhs_parent_num > 0 { + Some(self.lhs_parent_stack.peek()) + } else { + None + }, + if self.rhs_parent_num > 0 { + Some(self.rhs_parent_stack.peek()) + } else { + None + }, + ) } - fn can_pop_either_parent(&self) -> bool { - matches!(self.lhs_parents.peek().map(Into::into), Some(PopEither(_))) - || matches!(self.rhs_parents.peek().map(Into::into), Some(PopEither(_))) + fn can_pop_either(&self) -> bool { + matches!( + self.try_pop_tag(), + (Some(PopEither), _) | (_, Some(PopEither)) + ) } } @@ -171,7 +257,7 @@ impl PartialEq for Vertex<'_, '_> { self.lhs_syntax == other.lhs_syntax && self.rhs_syntax == other.rhs_syntax - && self.can_pop_either_parent() == other.can_pop_either_parent() + && self.can_pop_either() == other.can_pop_either() } } @@ -181,7 +267,7 @@ impl Hash for Vertex<'_, '_> { fn hash(&self, state: &mut H) { self.lhs_syntax.hash(state); self.rhs_syntax.hash(state); - self.can_pop_either_parent().hash(state); + self.can_pop_either().hash(state); } } @@ -191,61 +277,6 @@ impl Default for Vertex<'_, '_> { } } -/// Tracks entering syntax List nodes. -#[derive(Debug, Clone, PartialEq, Eq)] -enum EnteredDelimiterEnum<'a> { - /// If we've entered the LHS or RHS separately, we can pop either - /// side independently. - PopEither(&'a Syntax<'a>), - /// If we've entered the LHS and RHS together, we must pop both - /// sides together too. Otherwise we'd consider the following case to have no changes. - /// - /// ```text - /// Old: (a b c) - /// New: (a b) c - /// ``` - PopBoth(&'a Syntax<'a>), -} - -use EnteredDelimiterEnum::*; - -/// Compress `EnteredDelimiterEnum` into a usize. -/// -/// Utilize the LSB as flag since `&Syntax` is aligned. -/// -/// ```text -/// LSB = 0 -> PopEither -/// LSB = 1 -> PopBoth -/// ``` -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct EnteredDelimiter<'a> { - data: usize, - phantom: PhantomData<&'a Syntax<'a>>, -} - -impl<'a> From<&EnteredDelimiter<'a>> for EnteredDelimiterEnum<'a> { - fn from(delim: &EnteredDelimiter<'a>) -> Self { - if delim.data & 1 == 0 { - PopEither(unsafe { &*(delim.data as *const _) }) - } else { - PopBoth(unsafe { &*((delim.data ^ 1) as *const _) }) - } - } -} - -impl<'a> From> for EnteredDelimiter<'a> { - fn from(delim: EnteredDelimiterEnum<'a>) -> Self { - let data = match delim { - PopEither(s) => s as *const _ as usize, - PopBoth(s) => s as *const _ as usize | 1, - }; - EnteredDelimiter { - data, - phantom: PhantomData, - } - } -} - /// An edge in our graph, with an associated [`cost`](Edge::cost). /// /// A syntax node can always be marked as novel, so a vertex will have @@ -366,7 +397,7 @@ fn allocate_if_new<'syn, 'b>( if let Some(existing) = value { return existing; } - if key.lhs_parents == v.lhs_parents && key.rhs_parents == v.rhs_parents { + if key.parents_eq(&v) { return key; } let allocated = alloc.alloc(v); @@ -408,13 +439,12 @@ pub fn get_neighbours<'syn, 'b>( ); let v_info = ( - v.lhs_syntax.get_ref(), - v.rhs_syntax.get_ref(), - v.lhs_parents.peek().map(Into::into), - v.rhs_parents.peek().map(Into::into), + v.lhs_syntax.get_side(), + v.rhs_syntax.get_side(), + v.try_pop_tag(), ); - if let (None, None, Some(PopBoth(lhs_parent)), Some(PopBoth(rhs_parent))) = v_info { + if let (None, None, (Some(PopBoth), Some(PopBoth))) = v_info { // We have exhausted all the nodes on both lists, so we can // move up to the parent node. @@ -422,32 +452,36 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( ExitDelimiterBoth, Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: v.lhs_parents.pop().unwrap(), - rhs_parents: v.rhs_parents.pop().unwrap(), + lhs_syntax: next_sibling_syntax(v.lhs_syntax.parent().unwrap()), + rhs_syntax: next_sibling_syntax(v.rhs_syntax.parent().unwrap()), + lhs_parent_num: v.lhs_parent_num - 1, + rhs_parent_num: v.rhs_parent_num - 1, + lhs_parent_stack: v.lhs_parent_stack.pop(), + rhs_parent_stack: v.rhs_parent_stack.pop(), ..Vertex::default() }, ); } - if let (None, _, Some(PopEither(lhs_parent)), _) = v_info { + if let (None, _, (Some(PopEither), _)) = v_info { // Move to next after LHS parent. // Continue from sibling of parent. add_neighbor( ExitDelimiterLHS, Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), + lhs_syntax: next_sibling_syntax(v.lhs_syntax.parent().unwrap()), rhs_syntax: v.rhs_syntax, - lhs_parents: v.lhs_parents.pop().unwrap(), - rhs_parents: v.rhs_parents.clone(), + lhs_parent_num: v.lhs_parent_num - 1, + rhs_parent_num: v.rhs_parent_num, + lhs_parent_stack: v.lhs_parent_stack.pop(), + rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, ); } - if let (_, None, _, Some(PopEither(rhs_parent))) = v_info { + if let (_, None, (_, Some(PopEither))) = v_info { // Move to next after RHS parent. // Continue from sibling of parent. @@ -455,15 +489,17 @@ pub fn get_neighbours<'syn, 'b>( ExitDelimiterRHS, Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.pop().unwrap(), + rhs_syntax: next_sibling_syntax(v.rhs_syntax.parent().unwrap()), + lhs_parent_num: v.lhs_parent_num, + rhs_parent_num: v.rhs_parent_num - 1, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack.pop(), ..Vertex::default() }, ); } - if let (Some(lhs_syntax), Some(rhs_syntax), _, _) = v_info { + if let (Some(lhs_syntax), Some(rhs_syntax), _) = v_info { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -475,8 +511,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), + lhs_parent_num: v.lhs_parent_num, + rhs_parent_num: v.rhs_parent_num, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, ); @@ -510,8 +548,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - lhs_parents: v.lhs_parents.push(PopBoth(lhs_syntax).into()), - rhs_parents: v.rhs_parents.push(PopBoth(rhs_syntax).into()), + lhs_parent_num: v.lhs_parent_num + 1, + rhs_parent_num: v.rhs_parent_num + 1, + lhs_parent_stack: v.lhs_parent_stack.push(PopBoth), + rhs_parent_stack: v.rhs_parent_stack.push(PopBoth), ..Vertex::default() }, ); @@ -542,8 +582,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), + lhs_parent_num: v.lhs_parent_num, + rhs_parent_num: v.rhs_parent_num, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, ); @@ -551,7 +593,7 @@ pub fn get_neighbours<'syn, 'b>( } } - if let (Some(lhs_syntax), _, _, _) = v_info { + if let (Some(lhs_syntax), _, _) = v_info { match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -565,8 +607,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), + lhs_parent_num: v.lhs_parent_num, + rhs_parent_num: v.rhs_parent_num, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, ); @@ -580,8 +624,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, - lhs_parents: v.lhs_parents.push(PopEither(lhs_syntax).into()), - rhs_parents: v.rhs_parents.clone(), + lhs_parent_num: v.lhs_parent_num + 1, + rhs_parent_num: v.rhs_parent_num, + lhs_parent_stack: v.lhs_parent_stack.push(PopEither), + rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, ); @@ -589,7 +635,7 @@ pub fn get_neighbours<'syn, 'b>( } } - if let (_, Some(rhs_syntax), _, _) = v_info { + if let (_, Some(rhs_syntax), _) = v_info { match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -601,8 +647,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), + lhs_parent_num: v.lhs_parent_num, + rhs_parent_num: v.rhs_parent_num, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, ); @@ -616,8 +664,10 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_child_syntax(rhs_syntax, children), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.push(PopEither(rhs_syntax).into()), + lhs_parent_num: v.lhs_parent_num, + rhs_parent_num: v.rhs_parent_num + 1, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack.push(PopEither), ..Vertex::default() }, ); @@ -639,8 +689,8 @@ pub fn populate_change_map<'a, 'b>( } UnchangedNode { .. } => { // No change on this node or its children. - let lhs = v.lhs_syntax.get_ref().unwrap(); - let rhs = v.rhs_syntax.get_ref().unwrap(); + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); insert_deep_unchanged(lhs, rhs, change_map); insert_deep_unchanged(rhs, lhs, change_map); @@ -648,14 +698,14 @@ pub fn populate_change_map<'a, 'b>( EnterUnchangedDelimiter { .. } => { // No change on the outer delimiter, but children may // have changed. - let lhs = v.lhs_syntax.get_ref().unwrap(); - let rhs = v.rhs_syntax.get_ref().unwrap(); + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); change_map.insert(lhs, ChangeKind::Unchanged(rhs)); change_map.insert(rhs, ChangeKind::Unchanged(lhs)); } ReplacedComment { levenshtein_pct } => { - let lhs = v.lhs_syntax.get_ref().unwrap(); - let rhs = v.rhs_syntax.get_ref().unwrap(); + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); if *levenshtein_pct > 40 { change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); @@ -666,11 +716,11 @@ pub fn populate_change_map<'a, 'b>( } } NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => { - let lhs = v.lhs_syntax.get_ref().unwrap(); + let lhs = v.lhs_syntax.get_side().unwrap(); change_map.insert(lhs, ChangeKind::Novel); } NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => { - let rhs = v.rhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); change_map.insert(rhs, ChangeKind::Novel); } } diff --git a/src/diff/mod.rs b/src/diff/mod.rs index 4bd7efd03d..44f10e6c78 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -3,5 +3,4 @@ pub mod dijkstra; mod graph; pub mod myers_diff; pub mod sliders; -mod stack; pub mod unchanged; diff --git a/src/diff/stack.rs b/src/diff/stack.rs deleted file mode 100644 index 58dad1ca9c..0000000000 --- a/src/diff/stack.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::rc::Rc; - -#[derive(Debug, Clone, Default, PartialEq, Eq)] -struct Node { - val: T, - next: Option>>, -} - -#[derive(Debug, Clone, Default, PartialEq, Eq)] -pub struct Stack { - head: Option>>, -} - -impl Stack { - pub fn new() -> Self { - Self { head: None } - } - - pub fn peek(&self) -> Option<&T> { - self.head.as_deref().map(|n| &n.val) - } - - pub fn pop(&self) -> Option> { - self.head.as_deref().map(|n| Self { - head: n.next.clone(), - }) - } - - pub fn push(&self, v: T) -> Stack { - Self { - head: Some(Rc::new(Node { - val: v, - next: self.head.clone(), - })), - } - } - - // // O(n) - // pub fn size(&self) -> usize { - // let mut res = 0; - // let mut node = &self.head; - // while let Some(next) = node { - // res += 1; - // node = &next.next; - // } - // res - // } - - pub fn is_empty(&self) -> bool { - self.head.is_none() - } -} From ec528308fbd244cca9ae7b07802d2b3e8e48a117 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 07:48:36 +0800 Subject: [PATCH 23/58] Add comments --- src/diff/graph.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 17e65670c5..bc5550d7cf 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -92,7 +92,8 @@ enum EnteredDelimiter { /// side independently. PopEither = 0, /// If we've entered the LHS and RHS together, we must pop both - /// sides together too. Otherwise we'd consider the following case to have no changes. + /// sides together too. Otherwise we'd consider the following + /// case to have no changes. /// /// ```text /// Old: (a b c) @@ -103,6 +104,9 @@ enum EnteredDelimiter { use EnteredDelimiter::*; +/// LSP is the stack top. One bit represents one `EnteredDelimiter`. +/// +/// Length is not included since we want to avoid paddings. #[derive(Debug, Clone, Copy, PartialEq, Eq)] struct BitStack(u64); From ee725a190e00db43bd446643d5d56f3c95bd2ef1 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 08:04:05 +0800 Subject: [PATCH 24/58] Add asserts --- src/diff/graph.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index bc5550d7cf..9dda1778f2 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -542,6 +542,10 @@ pub fn get_neighbours<'syn, 'b>( // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { // TODO: be consistent between parents_next and next_parents. + assert!( + v.lhs_parent_num < 64 && v.rhs_parent_num < 64, + "Exceeds max stack depth" + ); let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -621,6 +625,7 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { + assert!(v.lhs_parent_num < 64, "Exceeds max stack depth"); add_neighbor( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), @@ -661,6 +666,7 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { + assert!(v.rhs_parent_num < 64, "Exceeds max stack depth"); add_neighbor( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), From 8a0c82ad623121d63cafeae7eedb3c0471861e7a Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 08:04:50 +0800 Subject: [PATCH 25/58] Reuse neighbors vector --- src/diff/dijkstra.rs | 7 ++++++- src/diff/graph.rs | 7 ++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index a898403a7c..f3634b20e0 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -35,6 +35,8 @@ fn shortest_path<'a, 'b>( let mut seen = SeenMap::default(); seen.reserve(size_hint); + let mut neighbors = Vec::with_capacity(4); + let end: &'b Vertex<'a, 'b> = loop { match heap.pop() { Some((Reverse(distance), current)) => { @@ -47,7 +49,10 @@ fn shortest_path<'a, 'b>( break current; } - for (edge, next) in get_neighbours(current, vertex_arena, &mut seen) { + neighbors.clear(); + get_neighbours(current, vertex_arena, &mut seen, &mut neighbors); + + for &(edge, next) in neighbors.iter() { let distance_to_next = distance + edge.cost(); if distance_to_next < next.shortest_distance.get() { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 9dda1778f2..465c30f604 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -429,9 +429,8 @@ pub fn get_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, -) -> Vec<(Edge, &'b Vertex<'syn, 'b>)> { - let mut neighbors = Vec::with_capacity(4); - + neighbors: &mut Vec<(Edge, &'b Vertex<'syn, 'b>)>, +) { let mut add_neighbor = std::convert::identity( #[inline(always)] |edge, vertex| { @@ -684,8 +683,6 @@ pub fn get_neighbours<'syn, 'b>( } } } - - neighbors } pub fn populate_change_map<'a, 'b>( From 0e9a4c299d605e2436eabb713f679a12333f2209 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 08:55:02 +0800 Subject: [PATCH 26/58] Add root to SyntaxInfo to optimize parent comparison --- src/diff/graph.rs | 35 ++++++++++++++++++----------------- src/parse/syntax.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 465c30f604..f5a829b32b 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -53,6 +53,16 @@ impl<'a> SideSyntax<'a> { } } + pub fn root(&self) -> Option<&'a Syntax<'a>> { + match self.get_side() { + Some(side) => Some(side.root()), + None => match self.data ^ 1 { + 0 => None, + d => Some(unsafe { &*(d as *const Syntax<'a>) }.root()), + }, + } + } + pub fn from_side(side: &'a Syntax<'a>) -> Self { Self { data: side as *const _ as _, @@ -191,24 +201,15 @@ impl<'a, 'b> Vertex<'a, 'b> { } } - fn parents_eq(&self, other: &Vertex) -> bool { - fn parents_recur_eq(mut a: Option<&Syntax>, mut b: Option<&Syntax>) -> bool { - while let (Some(pa), Some(pb)) = (a, b) { - if pa.content_id() != pb.content_id() { - return false; - } - a = pa.parent(); - b = pb.parent(); - } - true - } - - self.lhs_parent_num == other.lhs_parent_num - && self.rhs_parent_num == other.rhs_parent_num - && self.lhs_parent_stack == other.lhs_parent_stack + fn parents_eq(&self, other: &Vertex<'a, 'b>) -> bool { + self.lhs_parent_stack == other.lhs_parent_stack && self.rhs_parent_stack == other.rhs_parent_stack - && parents_recur_eq(self.lhs_syntax.parent(), other.lhs_syntax.parent()) - && parents_recur_eq(self.rhs_syntax.parent(), other.rhs_syntax.parent()) + // roots are equal + // => roots' content IDs are equal, + // => children's content IDs are equal + // => their parents are equal + && self.lhs_syntax.root() == other.lhs_syntax.root() + && self.rhs_syntax.root() == other.rhs_syntax.root() } fn try_pop_tag(&self) -> (Option, Option) { diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index b1829fbeb8..b16bf60730 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -47,6 +47,8 @@ pub struct SyntaxInfo<'a> { prev: Cell>>, /// The parent syntax node, if present. parent: Cell>>, + /// The root syntax node. + root: Cell>>, /// Does the previous syntax node occur on the same line as the /// first line of this node? prev_is_contiguous: Cell, @@ -73,6 +75,7 @@ impl<'a> SyntaxInfo<'a> { next_sibling: Cell::new(None), prev: Cell::new(None), parent: Cell::new(None), + root: Cell::new(None), prev_is_contiguous: Cell::new(false), num_ancestors: Cell::new(0), num_after: Cell::new(0), @@ -268,6 +271,10 @@ impl<'a> Syntax<'a> { self.info().parent.get() } + pub fn root(&self) -> &'a Syntax<'a> { + self.info().root.get().unwrap() + } + pub fn next_sibling(&self) -> Option<&'a Syntax<'a>> { self.info().next_sibling.get() } @@ -435,6 +442,7 @@ pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { /// side (LHS or RHS). fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) { set_parent(roots, None); + set_root(roots); set_num_ancestors(roots, 0); set_num_after(roots, 0); set_unique_id(roots, next_id); @@ -528,6 +536,24 @@ fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { } } +fn set_root<'a>(nodes: &[&'a Syntax<'a>]) { + fn inner<'a>(nodes: &[&'a Syntax<'a>], root: &'a Syntax<'a>) { + for node in nodes { + node.info().root.set(Some(root)); + if let List { children, .. } = node { + inner(children, root); + } + } + } + + for node in nodes { + node.info().root.set(Some(node)); + if let List { children, .. } = node { + inner(children, node); + } + } +} + fn set_num_ancestors(nodes: &[&Syntax], num_ancestors: u32) { for node in nodes { node.info().num_ancestors.set(num_ancestors); From 63eadc19b5c586541c619444ce81c3cf47c1233a Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 09:17:43 +0800 Subject: [PATCH 27/58] Optimize can_pop_either --- src/diff/graph.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index f5a829b32b..3b3a7b2001 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -100,7 +100,7 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> enum EnteredDelimiter { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. - PopEither = 0, + PopEither = 1, /// If we've entered the LHS and RHS together, we must pop both /// sides together too. Otherwise we'd consider the following /// case to have no changes. @@ -109,7 +109,7 @@ enum EnteredDelimiter { /// Old: (a b c) /// New: (a b) c /// ``` - PopBoth = 1, + PopBoth = 0, } use EnteredDelimiter::*; @@ -228,10 +228,13 @@ impl<'a, 'b> Vertex<'a, 'b> { } fn can_pop_either(&self) -> bool { - matches!( - self.try_pop_tag(), - (Some(PopEither), _) | (_, Some(PopEither)) - ) + // matches!( + // self.try_pop_tag(), + // (Some(PopEither), _) | (_, Some(PopEither)) + // ) + + // Since PopEither = 1, when `peek` returns 1 it must be non-empty. + self.lhs_parent_stack.peek() == PopEither || self.rhs_parent_stack.peek() == PopEither } } From 82ce472548662e53e7f134605b3045e655f23bc8 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 09:28:41 +0800 Subject: [PATCH 28/58] Update assert text --- src/diff/graph.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 3b3a7b2001..c111677313 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -547,7 +547,7 @@ pub fn get_neighbours<'syn, 'b>( // TODO: be consistent between parents_next and next_parents. assert!( v.lhs_parent_num < 64 && v.rhs_parent_num < 64, - "Exceeds max stack depth" + "Exceed max stack depth" ); let depth_difference = (lhs_syntax.num_ancestors() as i32 @@ -628,7 +628,7 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - assert!(v.lhs_parent_num < 64, "Exceeds max stack depth"); + assert!(v.lhs_parent_num < 64, "Exceed max stack depth"); add_neighbor( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), @@ -669,7 +669,7 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - assert!(v.rhs_parent_num < 64, "Exceeds max stack depth"); + assert!(v.rhs_parent_num < 64, "Exceed max stack depth"); add_neighbor( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), From 8625f620af2c09390f2bde43bb9e8f7508afc8e6 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 09:42:37 +0800 Subject: [PATCH 29/58] Reserve change_map space --- src/diff/changes.rs | 4 ++++ src/diff/graph.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/diff/changes.rs b/src/diff/changes.rs index 5844b9c77f..bc51b1c5d8 100644 --- a/src/diff/changes.rs +++ b/src/diff/changes.rs @@ -26,6 +26,10 @@ impl<'a> ChangeMap<'a> { pub fn get(&self, node: &Syntax<'a>) -> Option> { self.changes.get(&node.id()).copied() } + + pub fn reserve(&mut self, additional: usize) { + self.changes.reserve(additional); + } } pub fn insert_deep_unchanged<'a>( diff --git a/src/diff/graph.rs b/src/diff/graph.rs index c111677313..7c3459167e 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -693,6 +693,8 @@ pub fn populate_change_map<'a, 'b>( route: &[(Edge, &'b Vertex<'a, 'b>)], change_map: &mut ChangeMap<'a>, ) { + change_map.reserve(route.len()); + for (e, v) in route { match e { ExitDelimiterBoth | ExitDelimiterLHS | ExitDelimiterRHS => { From d921c75c94aa56e2cf81ace8137eb70daecf2a1d Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 09:52:25 +0800 Subject: [PATCH 30/58] Combine insert_deep_unchanged calls --- src/diff/changes.rs | 15 ++++++++------- src/diff/graph.rs | 1 - src/diff/sliders.rs | 2 -- src/diff/unchanged.rs | 3 --- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/diff/changes.rs b/src/diff/changes.rs index bc51b1c5d8..493b7ff421 100644 --- a/src/diff/changes.rs +++ b/src/diff/changes.rs @@ -33,24 +33,25 @@ impl<'a> ChangeMap<'a> { } pub fn insert_deep_unchanged<'a>( - node: &'a Syntax<'a>, - opposite_node: &'a Syntax<'a>, + lhs: &'a Syntax<'a>, + rhs: &'a Syntax<'a>, change_map: &mut ChangeMap<'a>, ) { - change_map.insert(node, ChangeKind::Unchanged(opposite_node)); + change_map.insert(lhs, ChangeKind::Unchanged(rhs)); + change_map.insert(rhs, ChangeKind::Unchanged(lhs)); - match (node, opposite_node) { + match (lhs, rhs) { ( Syntax::List { - children: node_children, + children: lhs_children, .. }, Syntax::List { - children: opposite_children, + children: rhs_children, .. }, ) => { - for (child, opposite_child) in node_children.iter().zip(opposite_children) { + for (child, opposite_child) in lhs_children.iter().zip(rhs_children) { insert_deep_unchanged(child, opposite_child, change_map); } } diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7c3459167e..5fc6670caf 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -706,7 +706,6 @@ pub fn populate_change_map<'a, 'b>( let rhs = v.rhs_syntax.get_side().unwrap(); insert_deep_unchanged(lhs, rhs, change_map); - insert_deep_unchanged(rhs, lhs, change_map); } EnterUnchangedDelimiter { .. } => { // No change on the outer delimiter, but children may diff --git a/src/diff/sliders.rs b/src/diff/sliders.rs index dd94cc3d64..ce3f98740b 100644 --- a/src/diff/sliders.rs +++ b/src/diff/sliders.rs @@ -486,7 +486,6 @@ fn slide_to_prev_node<'a>( insert_deep_novel(before_start_node, change_map); insert_deep_unchanged(last_node, opposite, change_map); - insert_deep_unchanged(opposite, last_node, change_map); } } @@ -535,7 +534,6 @@ fn slide_to_next_node<'a>( }; insert_deep_unchanged(start_node, opposite, change_map); - insert_deep_unchanged(opposite, start_node, change_map); insert_deep_novel(after_last_node, change_map); } } diff --git a/src/diff/unchanged.rs b/src/diff/unchanged.rs index 2b7de0d56e..88042172f8 100644 --- a/src/diff/unchanged.rs +++ b/src/diff/unchanged.rs @@ -71,7 +71,6 @@ fn split_unchanged<'a>( lhs_section_nodes.iter().zip(rhs_section_nodes.iter()) { insert_deep_unchanged(lhs_section_node, rhs_section_node, change_map); - insert_deep_unchanged(rhs_section_node, lhs_section_node, change_map); } } ChangeState::PossiblyChanged => { @@ -408,7 +407,6 @@ fn shrink_unchanged_at_ends<'a>( // trivial unchanged node in the middle. if lhs_node.content_id() == rhs_node.content_id() { insert_deep_unchanged(lhs_node, rhs_node, change_map); - insert_deep_unchanged(rhs_node, lhs_node, change_map); changed = true; lhs_nodes = &lhs_nodes[1..]; @@ -421,7 +419,6 @@ fn shrink_unchanged_at_ends<'a>( while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.last(), rhs_nodes.last()) { if lhs_node.content_id() == rhs_node.content_id() { insert_deep_unchanged(lhs_node, rhs_node, change_map); - insert_deep_unchanged(rhs_node, lhs_node, change_map); changed = true; lhs_nodes = &lhs_nodes[..lhs_nodes.len() - 1]; From 97e883dd5fb263d5d91bb8edbf0618347b66557e Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 2 Oct 2022 10:45:05 +0800 Subject: [PATCH 31/58] Reserve arena space --- src/diff/dijkstra.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index f3634b20e0..a48dd071cd 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -142,7 +142,7 @@ pub fn mark_syntax<'a>( let size_hint = lhs_node_count * rhs_node_count; let start = Vertex::new(lhs_syntax, rhs_syntax); - let vertex_arena = Bump::new(); + let vertex_arena = Bump::with_capacity(size_hint); let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?; From 7e4e747f43d92463599f2a4bb066df5de34eb4a6 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Mon, 3 Oct 2022 00:40:59 +0800 Subject: [PATCH 32/58] Refactor BitStack --- src/diff/graph.rs | 116 ++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 70 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 5fc6670caf..9fee9636fb 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -100,7 +100,7 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> enum EnteredDelimiter { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. - PopEither = 1, + PopEither = 0, /// If we've entered the LHS and RHS together, we must pop both /// sides together too. Otherwise we'd consider the following /// case to have no changes. @@ -109,31 +109,54 @@ enum EnteredDelimiter { /// Old: (a b c) /// New: (a b) c /// ``` - PopBoth = 0, + PopBoth = 1, } use EnteredDelimiter::*; /// LSP is the stack top. One bit represents one `EnteredDelimiter`. /// -/// Length is not included since we want to avoid paddings. +/// Assume the underlying type is u8, then +/// +/// ```text +/// initial: 0b00000001 +/// push x: 0b0000001x +/// push y: 0b000001xy +/// pop: 0b0000001x +/// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq)] struct BitStack(u64); impl BitStack { - fn peek(&self) -> EnteredDelimiter { - if self.0 & 1 == PopEither as u64 { - PopEither + fn new() -> Self { + Self(1) + } + + fn is_empty(&self) -> bool { + self.0 == 1 + } + + fn is_full(&self) -> bool { + self.0 & (1 << 63) != 0 + } + + fn peek(&self) -> Option { + if self.is_empty() { + None + } else if self.0 & 1 == PopEither as u64 { + Some(PopEither) } else { - PopBoth + Some(PopBoth) } } fn push(&self, tag: EnteredDelimiter) -> Self { + assert!(!self.is_full(), "BitStack is full!"); Self((self.0 << 1) | tag as u64) } fn pop(&self) -> Self { + assert!(!self.is_empty(), "BitStack is empty!"); Self(self.0 >> 1) } } @@ -173,8 +196,6 @@ pub struct Vertex<'a, 'b> { pub predecessor: Cell)>>, pub lhs_syntax: SideSyntax<'a>, pub rhs_syntax: SideSyntax<'a>, - lhs_parent_num: u8, - rhs_parent_num: u8, lhs_parent_stack: BitStack, rhs_parent_stack: BitStack, } @@ -183,8 +204,8 @@ impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { !self.lhs_syntax.is_side() && !self.rhs_syntax.is_side() - && self.lhs_parent_num == 0 - && self.rhs_parent_num == 0 + && self.lhs_parent_stack.is_empty() + && self.rhs_parent_stack.is_empty() } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { @@ -194,10 +215,8 @@ impl<'a, 'b> Vertex<'a, 'b> { predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), rhs_syntax: rhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), - lhs_parent_num: 0, - rhs_parent_num: 0, - lhs_parent_stack: BitStack(0), - rhs_parent_stack: BitStack(0), + lhs_parent_stack: BitStack::new(), + rhs_parent_stack: BitStack::new(), } } @@ -212,29 +231,12 @@ impl<'a, 'b> Vertex<'a, 'b> { && self.rhs_syntax.root() == other.rhs_syntax.root() } - fn try_pop_tag(&self) -> (Option, Option) { - ( - if self.lhs_parent_num > 0 { - Some(self.lhs_parent_stack.peek()) - } else { - None - }, - if self.rhs_parent_num > 0 { - Some(self.rhs_parent_stack.peek()) - } else { - None - }, - ) - } - fn can_pop_either(&self) -> bool { - // matches!( - // self.try_pop_tag(), - // (Some(PopEither), _) | (_, Some(PopEither)) - // ) + // self.lhs_parent_stack.peek() == Some(PopEither) + // || self.rhs_parent_stack.peek() == Some(PopEither) - // Since PopEither = 1, when `peek` returns 1 it must be non-empty. - self.lhs_parent_stack.peek() == PopEither || self.rhs_parent_stack.peek() == PopEither + // Utilize the fact that PopEither = 0 while stack top indicator is 1. + self.lhs_parent_stack.0 & 1 == 0 || self.rhs_parent_stack.0 & 1 == 0 } } @@ -448,10 +450,11 @@ pub fn get_neighbours<'syn, 'b>( let v_info = ( v.lhs_syntax.get_side(), v.rhs_syntax.get_side(), - v.try_pop_tag(), + v.lhs_parent_stack.peek(), + v.rhs_parent_stack.peek(), ); - if let (None, None, (Some(PopBoth), Some(PopBoth))) = v_info { + if let (None, None, Some(PopBoth), Some(PopBoth)) = v_info { // We have exhausted all the nodes on both lists, so we can // move up to the parent node. @@ -461,8 +464,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(v.lhs_syntax.parent().unwrap()), rhs_syntax: next_sibling_syntax(v.rhs_syntax.parent().unwrap()), - lhs_parent_num: v.lhs_parent_num - 1, - rhs_parent_num: v.rhs_parent_num - 1, lhs_parent_stack: v.lhs_parent_stack.pop(), rhs_parent_stack: v.rhs_parent_stack.pop(), ..Vertex::default() @@ -470,7 +471,7 @@ pub fn get_neighbours<'syn, 'b>( ); } - if let (None, _, (Some(PopEither), _)) = v_info { + if let (None, _, Some(PopEither), _) = v_info { // Move to next after LHS parent. // Continue from sibling of parent. @@ -479,8 +480,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(v.lhs_syntax.parent().unwrap()), rhs_syntax: v.rhs_syntax, - lhs_parent_num: v.lhs_parent_num - 1, - rhs_parent_num: v.rhs_parent_num, lhs_parent_stack: v.lhs_parent_stack.pop(), rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -488,7 +487,7 @@ pub fn get_neighbours<'syn, 'b>( ); } - if let (_, None, (_, Some(PopEither))) = v_info { + if let (_, None, _, Some(PopEither)) = v_info { // Move to next after RHS parent. // Continue from sibling of parent. @@ -497,8 +496,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(v.rhs_syntax.parent().unwrap()), - lhs_parent_num: v.lhs_parent_num, - rhs_parent_num: v.rhs_parent_num - 1, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack.pop(), ..Vertex::default() @@ -506,7 +503,7 @@ pub fn get_neighbours<'syn, 'b>( ); } - if let (Some(lhs_syntax), Some(rhs_syntax), _) = v_info { + if let (Some(lhs_syntax), Some(rhs_syntax), _, _) = v_info { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -518,8 +515,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parent_num: v.lhs_parent_num, - rhs_parent_num: v.rhs_parent_num, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -545,11 +540,6 @@ pub fn get_neighbours<'syn, 'b>( // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { // TODO: be consistent between parents_next and next_parents. - assert!( - v.lhs_parent_num < 64 && v.rhs_parent_num < 64, - "Exceed max stack depth" - ); - let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); @@ -559,8 +549,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - lhs_parent_num: v.lhs_parent_num + 1, - rhs_parent_num: v.rhs_parent_num + 1, lhs_parent_stack: v.lhs_parent_stack.push(PopBoth), rhs_parent_stack: v.rhs_parent_stack.push(PopBoth), ..Vertex::default() @@ -593,8 +581,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parent_num: v.lhs_parent_num, - rhs_parent_num: v.rhs_parent_num, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -604,7 +590,7 @@ pub fn get_neighbours<'syn, 'b>( } } - if let (Some(lhs_syntax), _, _) = v_info { + if let (Some(lhs_syntax), _, _, _) = v_info { match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -618,8 +604,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, - lhs_parent_num: v.lhs_parent_num, - rhs_parent_num: v.rhs_parent_num, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -628,7 +612,6 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - assert!(v.lhs_parent_num < 64, "Exceed max stack depth"); add_neighbor( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), @@ -636,8 +619,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, - lhs_parent_num: v.lhs_parent_num + 1, - rhs_parent_num: v.rhs_parent_num, lhs_parent_stack: v.lhs_parent_stack.push(PopEither), rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -647,7 +628,7 @@ pub fn get_neighbours<'syn, 'b>( } } - if let (_, Some(rhs_syntax), _) = v_info { + if let (_, Some(rhs_syntax), _, _) = v_info { match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -659,8 +640,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parent_num: v.lhs_parent_num, - rhs_parent_num: v.rhs_parent_num, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -669,7 +648,6 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - assert!(v.rhs_parent_num < 64, "Exceed max stack depth"); add_neighbor( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), @@ -677,8 +655,6 @@ pub fn get_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_child_syntax(rhs_syntax, children), - lhs_parent_num: v.lhs_parent_num, - rhs_parent_num: v.rhs_parent_num + 1, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack.push(PopEither), ..Vertex::default() From ec56c99ad8ebc35ba1a5ba09c02fe5bb1c3cfdc4 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Mon, 3 Oct 2022 03:16:08 +0800 Subject: [PATCH 33/58] Small tweaks --- src/diff/dijkstra.rs | 35 ++++++++--------------------------- src/diff/graph.rs | 44 +++++++++++++++++++------------------------- 2 files changed, 27 insertions(+), 52 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index a48dd071cd..5ae388f8ad 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -1,7 +1,7 @@ //! Implements Dijkstra's algorithm for shortest path, to find an //! optimal and readable diff between two ASTs. -use std::{cmp::Reverse, env}; +use std::{cmp::Reverse, env, iter::successors}; use crate::{ diff::changes::ChangeMap, @@ -77,46 +77,27 @@ fn shortest_path<'a, 'b>( heap.len(), ); - let mut current = end.predecessor.get(); - let mut vertex_route = vec![]; - while let Some((edge, node)) = current { - vertex_route.push((edge, node)); - current = node.predecessor.get(); - } - + let mut vertex_route = + successors(end.predecessor.get(), |&(_, node)| node.predecessor.get()).collect::>(); vertex_route.reverse(); Ok(vertex_route) } /// What is the total number of AST nodes? fn node_count(root: Option<&Syntax>) -> u32 { - let mut node = root; - let mut count = 0; - while let Some(current_node) = node { - let current_count = match current_node { + successors(root, |node| node.next_sibling()) + .map(|node| match node { Syntax::List { num_descendants, .. } => *num_descendants, Syntax::Atom { .. } => 1, - }; - count += current_count; - - node = current_node.next_sibling(); - } - - count + }) + .sum::() } /// How many top-level AST nodes do we have? fn tree_count(root: Option<&Syntax>) -> u32 { - let mut node = root; - let mut count = 0; - while let Some(current_node) = node { - count += 1; - node = current_node.next_sibling(); - } - - count + successors(root, |node| node.next_sibling()).count() as u32 } pub fn mark_syntax<'a>( diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 9fee9636fb..b6ddc3363b 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -82,17 +82,15 @@ impl<'a> SideSyntax<'a> { } } -fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { +fn next_sibling<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { let parent = SideSyntax::from_parent(syntax.parent()); syntax.next_sibling().map_or(parent, SideSyntax::from_side) } -fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { +fn next_child<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { let parent = SideSyntax::from_parent(Some(syntax)); - children - .get(0) - .copied() - .map_or(parent, SideSyntax::from_side) + let child = children.get(0).copied(); + child.map_or(parent, SideSyntax::from_side) } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -119,7 +117,7 @@ use EnteredDelimiter::*; /// Assume the underlying type is u8, then /// /// ```text -/// initial: 0b00000001 +/// new: 0b00000001 /// push x: 0b0000001x /// push y: 0b000001xy /// pop: 0b0000001x @@ -223,10 +221,6 @@ impl<'a, 'b> Vertex<'a, 'b> { fn parents_eq(&self, other: &Vertex<'a, 'b>) -> bool { self.lhs_parent_stack == other.lhs_parent_stack && self.rhs_parent_stack == other.rhs_parent_stack - // roots are equal - // => roots' content IDs are equal, - // => children's content IDs are equal - // => their parents are equal && self.lhs_syntax.root() == other.lhs_syntax.root() && self.rhs_syntax.root() == other.rhs_syntax.root() } @@ -462,8 +456,8 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( ExitDelimiterBoth, Vertex { - lhs_syntax: next_sibling_syntax(v.lhs_syntax.parent().unwrap()), - rhs_syntax: next_sibling_syntax(v.rhs_syntax.parent().unwrap()), + lhs_syntax: next_sibling(v.lhs_syntax.parent().unwrap()), + rhs_syntax: next_sibling(v.rhs_syntax.parent().unwrap()), lhs_parent_stack: v.lhs_parent_stack.pop(), rhs_parent_stack: v.rhs_parent_stack.pop(), ..Vertex::default() @@ -478,7 +472,7 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( ExitDelimiterLHS, Vertex { - lhs_syntax: next_sibling_syntax(v.lhs_syntax.parent().unwrap()), + lhs_syntax: next_sibling(v.lhs_syntax.parent().unwrap()), rhs_syntax: v.rhs_syntax, lhs_parent_stack: v.lhs_parent_stack.pop(), rhs_parent_stack: v.rhs_parent_stack, @@ -495,7 +489,7 @@ pub fn get_neighbours<'syn, 'b>( ExitDelimiterRHS, Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(v.rhs_syntax.parent().unwrap()), + rhs_syntax: next_sibling(v.rhs_syntax.parent().unwrap()), lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack.pop(), ..Vertex::default() @@ -513,8 +507,8 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( UnchangedNode { depth_difference }, Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_syntax: next_sibling(lhs_syntax), + rhs_syntax: next_sibling(rhs_syntax), lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -547,8 +541,8 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( EnterUnchangedDelimiter { depth_difference }, Vertex { - lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), - rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), + lhs_syntax: next_child(lhs_syntax, lhs_children), + rhs_syntax: next_child(rhs_syntax, rhs_children), lhs_parent_stack: v.lhs_parent_stack.push(PopBoth), rhs_parent_stack: v.rhs_parent_stack.push(PopBoth), ..Vertex::default() @@ -579,8 +573,8 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( ReplacedComment { levenshtein_pct }, Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_syntax: next_sibling(lhs_syntax), + rhs_syntax: next_sibling(rhs_syntax), lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -602,7 +596,7 @@ pub fn get_neighbours<'syn, 'b>( probably_punctuation: looks_like_punctuation(content), }, Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), + lhs_syntax: next_sibling(lhs_syntax), rhs_syntax: v.rhs_syntax, lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, @@ -617,7 +611,7 @@ pub fn get_neighbours<'syn, 'b>( contiguous: lhs_syntax.prev_is_contiguous(), }, Vertex { - lhs_syntax: next_child_syntax(lhs_syntax, children), + lhs_syntax: next_child(lhs_syntax, children), rhs_syntax: v.rhs_syntax, lhs_parent_stack: v.lhs_parent_stack.push(PopEither), rhs_parent_stack: v.rhs_parent_stack, @@ -639,7 +633,7 @@ pub fn get_neighbours<'syn, 'b>( }, Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_syntax), + rhs_syntax: next_sibling(rhs_syntax), lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() @@ -654,7 +648,7 @@ pub fn get_neighbours<'syn, 'b>( }, Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_child_syntax(rhs_syntax, children), + rhs_syntax: next_child(rhs_syntax, children), lhs_parent_stack: v.lhs_parent_stack, rhs_parent_stack: v.rhs_parent_stack.push(PopEither), ..Vertex::default() From 005a5a6f31c916554bbb242184452d7ffd77f5dc Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Wed, 5 Oct 2022 23:56:48 +0800 Subject: [PATCH 34/58] Fix a bug --- src/parse/syntax.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index b16bf60730..d1aa3cbacc 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -432,6 +432,7 @@ fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) { } } pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { + set_root(roots); set_prev_sibling(roots); set_next_sibling(roots); set_prev(roots, None); @@ -442,7 +443,6 @@ pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { /// side (LHS or RHS). fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) { set_parent(roots, None); - set_root(roots); set_num_ancestors(roots, 0); set_num_after(roots, 0); set_unique_id(roots, next_id); From 10ce859bf4c465e3d32fa909c4f6150282910c05 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 03:10:24 +0800 Subject: [PATCH 35/58] Reduce edge kinds --- src/diff/dijkstra.rs | 12 ++++++------ src/diff/graph.rs | 19 ++++++------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 5ae388f8ad..02fbd7e641 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -253,7 +253,7 @@ mod tests { contiguous: false, probably_punctuation: false, }, - ExitDelimiterBoth, + ExitDelimiter, ] ); } @@ -303,7 +303,7 @@ mod tests { contiguous: false, probably_punctuation: false }, - ExitDelimiterBoth, + ExitDelimiter, ] ); } @@ -353,8 +353,8 @@ mod tests { UnchangedNode { depth_difference: 0 }, - ExitDelimiterRHS, - ExitDelimiterLHS, + ExitDelimiter, + ExitDelimiter, ], ); } @@ -434,7 +434,7 @@ mod tests { contiguous: true, probably_punctuation: false }, - ExitDelimiterLHS, + ExitDelimiter, ] ); } @@ -476,7 +476,7 @@ mod tests { contiguous: true, probably_punctuation: false }, - ExitDelimiterLHS, + ExitDelimiter, NovelAtomLHS { contiguous: true, probably_punctuation: true diff --git a/src/diff/graph.rs b/src/diff/graph.rs index b6ddc3363b..d5003b0304 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -315,9 +315,7 @@ pub enum Edge { EnterNovelDelimiterRHS { contiguous: bool, }, - ExitDelimiterLHS, - ExitDelimiterRHS, - ExitDelimiterBoth, + ExitDelimiter, } const NOT_CONTIGUOUS_PENALTY: u64 = 50; @@ -329,12 +327,7 @@ impl Edge { // delimiter possibility, so the cost doesn't matter. We // choose a non-zero number as it's easier to reason // about. - ExitDelimiterBoth => 1, - // Choose a higher value for exiting individually. This - // shouldn't matter since entering a novel delimiter is - // already more expensive than entering a matched - // delimiter, but be defensive. - ExitDelimiterLHS | ExitDelimiterRHS => 2, + ExitDelimiter => 1, // Matching nodes is always best. UnchangedNode { depth_difference } => min(40, u64::from(depth_difference) + 1), @@ -454,7 +447,7 @@ pub fn get_neighbours<'syn, 'b>( // Continue from sibling of parent. add_neighbor( - ExitDelimiterBoth, + ExitDelimiter, Vertex { lhs_syntax: next_sibling(v.lhs_syntax.parent().unwrap()), rhs_syntax: next_sibling(v.rhs_syntax.parent().unwrap()), @@ -470,7 +463,7 @@ pub fn get_neighbours<'syn, 'b>( // Continue from sibling of parent. add_neighbor( - ExitDelimiterLHS, + ExitDelimiter, Vertex { lhs_syntax: next_sibling(v.lhs_syntax.parent().unwrap()), rhs_syntax: v.rhs_syntax, @@ -486,7 +479,7 @@ pub fn get_neighbours<'syn, 'b>( // Continue from sibling of parent. add_neighbor( - ExitDelimiterRHS, + ExitDelimiter, Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling(v.rhs_syntax.parent().unwrap()), @@ -667,7 +660,7 @@ pub fn populate_change_map<'a, 'b>( for (e, v) in route { match e { - ExitDelimiterBoth | ExitDelimiterLHS | ExitDelimiterRHS => { + ExitDelimiter => { // Nothing to do: we have already marked this node when we entered it. } UnchangedNode { .. } => { From efc70f55e8b352c7a27ddf277fb2f58567280806 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 07:42:08 +0800 Subject: [PATCH 36/58] Combine some ExitDelimiterLHS/RHS vertices --- src/diff/graph.rs | 120 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index d5003b0304..6deca23f26 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -581,6 +581,14 @@ pub fn get_neighbours<'syn, 'b>( match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { + let mut next_syntax = lhs_syntax; + let mut next_stack = v.lhs_parent_stack; + while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) + { + next_syntax = next_syntax.parent().unwrap(); + next_stack = next_stack.pop(); + } + add_neighbor( NovelAtomLHS { // TODO: should this apply if prev is a parent @@ -589,9 +597,9 @@ pub fn get_neighbours<'syn, 'b>( probably_punctuation: looks_like_punctuation(content), }, Vertex { - lhs_syntax: next_sibling(lhs_syntax), + lhs_syntax: next_sibling(next_syntax), rhs_syntax: v.rhs_syntax, - lhs_parent_stack: v.lhs_parent_stack, + lhs_parent_stack: next_stack, rhs_parent_stack: v.rhs_parent_stack, ..Vertex::default() }, @@ -599,18 +607,42 @@ pub fn get_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - add_neighbor( - EnterNovelDelimiterLHS { - contiguous: lhs_syntax.prev_is_contiguous(), - }, - Vertex { - lhs_syntax: next_child(lhs_syntax, children), - rhs_syntax: v.rhs_syntax, - lhs_parent_stack: v.lhs_parent_stack.push(PopEither), - rhs_parent_stack: v.rhs_parent_stack, - ..Vertex::default() - }, - ); + if let Some(next_syntax) = children.get(0).copied() { + add_neighbor( + EnterNovelDelimiterLHS { + contiguous: lhs_syntax.prev_is_contiguous(), + }, + Vertex { + lhs_syntax: SideSyntax::from_side(next_syntax), + rhs_syntax: v.rhs_syntax, + lhs_parent_stack: v.lhs_parent_stack.push(PopEither), + rhs_parent_stack: v.rhs_parent_stack, + ..Vertex::default() + }, + ); + } else { + let mut next_syntax = lhs_syntax; + let mut next_stack = v.lhs_parent_stack; + while let (None, Some(PopEither)) = + (next_syntax.next_sibling(), next_stack.peek()) + { + next_syntax = next_syntax.parent().unwrap(); + next_stack = next_stack.pop(); + } + + add_neighbor( + EnterNovelDelimiterLHS { + contiguous: lhs_syntax.prev_is_contiguous(), + }, + Vertex { + lhs_syntax: next_sibling(next_syntax), + rhs_syntax: v.rhs_syntax, + lhs_parent_stack: next_stack, + rhs_parent_stack: v.rhs_parent_stack, + ..Vertex::default() + }, + ); + } } } } @@ -619,6 +651,14 @@ pub fn get_neighbours<'syn, 'b>( match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { + let mut next_syntax = rhs_syntax; + let mut next_stack = v.rhs_parent_stack; + while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) + { + next_syntax = next_syntax.parent().unwrap(); + next_stack = next_stack.pop(); + } + add_neighbor( NovelAtomRHS { contiguous: rhs_syntax.prev_is_contiguous(), @@ -626,27 +666,51 @@ pub fn get_neighbours<'syn, 'b>( }, Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling(rhs_syntax), + rhs_syntax: next_sibling(next_syntax), lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: v.rhs_parent_stack, + rhs_parent_stack: next_stack, ..Vertex::default() }, ); } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - add_neighbor( - EnterNovelDelimiterRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - }, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_child(rhs_syntax, children), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: v.rhs_parent_stack.push(PopEither), - ..Vertex::default() - }, - ); + if let Some(next_syntax) = children.get(0).copied() { + add_neighbor( + EnterNovelDelimiterRHS { + contiguous: rhs_syntax.prev_is_contiguous(), + }, + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: SideSyntax::from_side(next_syntax), + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: v.rhs_parent_stack.push(PopEither), + ..Vertex::default() + }, + ); + } else { + let mut next_syntax = rhs_syntax; + let mut next_stack = v.rhs_parent_stack; + while let (None, Some(PopEither)) = + (next_syntax.next_sibling(), next_stack.peek()) + { + next_syntax = next_syntax.parent().unwrap(); + next_stack = next_stack.pop(); + } + + add_neighbor( + EnterNovelDelimiterRHS { + contiguous: rhs_syntax.prev_is_contiguous(), + }, + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_sibling(next_syntax), + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: next_stack, + ..Vertex::default() + }, + ); + } } } } From e2bf31f609ac7fb75cee73e954f16560f07a603f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 07:54:55 +0800 Subject: [PATCH 37/58] Simplify get_neighbours --- src/diff/graph.rs | 208 ++++++++++++++++++---------------------------- 1 file changed, 80 insertions(+), 128 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 6deca23f26..6ec78780b6 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -578,141 +578,93 @@ pub fn get_neighbours<'syn, 'b>( } if let (Some(lhs_syntax), _, _, _) = v_info { - match lhs_syntax { - // Step over this novel atom. - Syntax::Atom { content, .. } => { - let mut next_syntax = lhs_syntax; - let mut next_stack = v.lhs_parent_stack; - while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) - { - next_syntax = next_syntax.parent().unwrap(); - next_stack = next_stack.pop(); - } + let (edge, child) = match lhs_syntax { + Syntax::Atom { content, .. } => ( + NovelAtomLHS { + // TODO: should this apply if prev is a parent + // node rather than a sibling? + contiguous: lhs_syntax.prev_is_contiguous(), + probably_punctuation: looks_like_punctuation(content), + }, + None, + ), + Syntax::List { children, .. } => ( + EnterNovelDelimiterLHS { + contiguous: lhs_syntax.prev_is_contiguous(), + }, + children.get(0).copied(), + ), + }; - add_neighbor( - NovelAtomLHS { - // TODO: should this apply if prev is a parent - // node rather than a sibling? - contiguous: lhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, - Vertex { - lhs_syntax: next_sibling(next_syntax), - rhs_syntax: v.rhs_syntax, - lhs_parent_stack: next_stack, - rhs_parent_stack: v.rhs_parent_stack, - ..Vertex::default() - }, - ); - } - // Step into this partially/fully novel list. - Syntax::List { children, .. } => { - if let Some(next_syntax) = children.get(0).copied() { - add_neighbor( - EnterNovelDelimiterLHS { - contiguous: lhs_syntax.prev_is_contiguous(), - }, - Vertex { - lhs_syntax: SideSyntax::from_side(next_syntax), - rhs_syntax: v.rhs_syntax, - lhs_parent_stack: v.lhs_parent_stack.push(PopEither), - rhs_parent_stack: v.rhs_parent_stack, - ..Vertex::default() - }, - ); - } else { - let mut next_syntax = lhs_syntax; - let mut next_stack = v.lhs_parent_stack; - while let (None, Some(PopEither)) = - (next_syntax.next_sibling(), next_stack.peek()) - { - next_syntax = next_syntax.parent().unwrap(); - next_stack = next_stack.pop(); - } - - add_neighbor( - EnterNovelDelimiterLHS { - contiguous: lhs_syntax.prev_is_contiguous(), - }, - Vertex { - lhs_syntax: next_sibling(next_syntax), - rhs_syntax: v.rhs_syntax, - lhs_parent_stack: next_stack, - rhs_parent_stack: v.rhs_parent_stack, - ..Vertex::default() - }, - ); - } + let (next_syntax, next_stack) = if let Some(child) = child { + ( + SideSyntax::from_side(child), + v.lhs_parent_stack.push(PopEither), + ) + } else { + let mut next_syntax = lhs_syntax; + let mut next_stack = v.lhs_parent_stack; + while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) { + next_syntax = next_syntax.parent().unwrap(); + next_stack = next_stack.pop(); } - } + (next_sibling(next_syntax), next_stack) + }; + + add_neighbor( + edge, + Vertex { + lhs_syntax: next_syntax, + rhs_syntax: v.rhs_syntax, + lhs_parent_stack: next_stack, + rhs_parent_stack: v.rhs_parent_stack, + ..Vertex::default() + }, + ); } if let (_, Some(rhs_syntax), _, _) = v_info { - match rhs_syntax { - // Step over this novel atom. - Syntax::Atom { content, .. } => { - let mut next_syntax = rhs_syntax; - let mut next_stack = v.rhs_parent_stack; - while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) - { - next_syntax = next_syntax.parent().unwrap(); - next_stack = next_stack.pop(); - } + let (edge, child) = match rhs_syntax { + Syntax::Atom { content, .. } => ( + NovelAtomRHS { + contiguous: rhs_syntax.prev_is_contiguous(), + probably_punctuation: looks_like_punctuation(content), + }, + None, + ), + Syntax::List { children, .. } => ( + EnterNovelDelimiterRHS { + contiguous: rhs_syntax.prev_is_contiguous(), + }, + children.get(0).copied(), + ), + }; - add_neighbor( - NovelAtomRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling(next_syntax), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: next_stack, - ..Vertex::default() - }, - ); - } - // Step into this partially/fully novel list. - Syntax::List { children, .. } => { - if let Some(next_syntax) = children.get(0).copied() { - add_neighbor( - EnterNovelDelimiterRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - }, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: SideSyntax::from_side(next_syntax), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: v.rhs_parent_stack.push(PopEither), - ..Vertex::default() - }, - ); - } else { - let mut next_syntax = rhs_syntax; - let mut next_stack = v.rhs_parent_stack; - while let (None, Some(PopEither)) = - (next_syntax.next_sibling(), next_stack.peek()) - { - next_syntax = next_syntax.parent().unwrap(); - next_stack = next_stack.pop(); - } - - add_neighbor( - EnterNovelDelimiterRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - }, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling(next_syntax), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: next_stack, - ..Vertex::default() - }, - ); - } + let (next_syntax, next_stack) = if let Some(child) = child { + ( + SideSyntax::from_side(child), + v.rhs_parent_stack.push(PopEither), + ) + } else { + let mut next_syntax = rhs_syntax; + let mut next_stack = v.rhs_parent_stack; + while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) { + next_syntax = next_syntax.parent().unwrap(); + next_stack = next_stack.pop(); } - } + (next_sibling(next_syntax), next_stack) + }; + + add_neighbor( + edge, + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_syntax, + lhs_parent_stack: v.lhs_parent_stack, + rhs_parent_stack: next_stack, + ..Vertex::default() + }, + ); } } From 4f01c1e415c2fddbe434751105bc5caa816f2e8f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 08:07:56 +0800 Subject: [PATCH 38/58] Combine all ExitDelimiterLHS/RHS vertices --- src/diff/graph.rs | 84 +++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 6ec78780b6..289344de76 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -458,52 +458,38 @@ pub fn get_neighbours<'syn, 'b>( ); } - if let (None, _, Some(PopEither), _) = v_info { - // Move to next after LHS parent. - - // Continue from sibling of parent. - add_neighbor( - ExitDelimiter, - Vertex { - lhs_syntax: next_sibling(v.lhs_syntax.parent().unwrap()), - rhs_syntax: v.rhs_syntax, - lhs_parent_stack: v.lhs_parent_stack.pop(), - rhs_parent_stack: v.rhs_parent_stack, - ..Vertex::default() - }, - ); - } - - if let (_, None, _, Some(PopEither)) = v_info { - // Move to next after RHS parent. - - // Continue from sibling of parent. - add_neighbor( - ExitDelimiter, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling(v.rhs_syntax.parent().unwrap()), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: v.rhs_parent_stack.pop(), - ..Vertex::default() - }, - ); - } - if let (Some(lhs_syntax), Some(rhs_syntax), _, _) = v_info { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); + let mut next_lhs_syntax = lhs_syntax; + let mut next_lhs_stack = v.lhs_parent_stack; + while let (None, Some(PopEither)) = + (next_lhs_syntax.next_sibling(), next_lhs_stack.peek()) + { + next_lhs_syntax = next_lhs_syntax.parent().unwrap(); + next_lhs_stack = next_lhs_stack.pop(); + } + + let mut next_rhs_syntax = rhs_syntax; + let mut next_rhs_stack = v.rhs_parent_stack; + while let (None, Some(PopEither)) = + (next_rhs_syntax.next_sibling(), next_rhs_stack.peek()) + { + next_rhs_syntax = next_rhs_syntax.parent().unwrap(); + next_rhs_stack = next_rhs_stack.pop(); + } + // Both nodes are equal, the happy case. add_neighbor( UnchangedNode { depth_difference }, Vertex { - lhs_syntax: next_sibling(lhs_syntax), - rhs_syntax: next_sibling(rhs_syntax), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: v.rhs_parent_stack, + lhs_syntax: next_sibling(next_lhs_syntax), + rhs_syntax: next_sibling(next_rhs_syntax), + lhs_parent_stack: next_lhs_stack, + rhs_parent_stack: next_rhs_stack, ..Vertex::default() }, ); @@ -563,13 +549,31 @@ pub fn get_neighbours<'syn, 'b>( let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; + let mut next_lhs_syntax = lhs_syntax; + let mut next_lhs_stack = v.lhs_parent_stack; + while let (None, Some(PopEither)) = + (next_lhs_syntax.next_sibling(), next_lhs_stack.peek()) + { + next_lhs_syntax = next_lhs_syntax.parent().unwrap(); + next_lhs_stack = next_lhs_stack.pop(); + } + + let mut next_rhs_syntax = rhs_syntax; + let mut next_rhs_stack = v.rhs_parent_stack; + while let (None, Some(PopEither)) = + (next_rhs_syntax.next_sibling(), next_rhs_stack.peek()) + { + next_rhs_syntax = next_rhs_syntax.parent().unwrap(); + next_rhs_stack = next_rhs_stack.pop(); + } + add_neighbor( ReplacedComment { levenshtein_pct }, Vertex { - lhs_syntax: next_sibling(lhs_syntax), - rhs_syntax: next_sibling(rhs_syntax), - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: v.rhs_parent_stack, + lhs_syntax: next_sibling(next_lhs_syntax), + rhs_syntax: next_sibling(next_rhs_syntax), + lhs_parent_stack: next_lhs_stack, + rhs_parent_stack: next_rhs_stack, ..Vertex::default() }, ); From fef14541a010153504dc82f2dd7c4d5217d5f00b Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 08:16:25 +0800 Subject: [PATCH 39/58] Simplify get_neighbours --- src/diff/graph.rs | 75 +++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 289344de76..1c8f90cead 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -417,6 +417,17 @@ fn looks_like_punctuation(content: &str) -> bool { content == "," || content == ";" || content == "." } +fn skip_pop_either<'a>( + mut syntax: &'a Syntax<'a>, + mut stack: BitStack, +) -> (SideSyntax<'a>, BitStack) { + while let (None, Some(PopEither)) = (syntax.next_sibling(), stack.peek()) { + syntax = syntax.parent().unwrap(); + stack = stack.pop(); + } + (next_sibling(syntax), stack) +} + /// Compute the neighbours of `v`. pub fn get_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, @@ -464,30 +475,15 @@ pub fn get_neighbours<'syn, 'b>( - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); - let mut next_lhs_syntax = lhs_syntax; - let mut next_lhs_stack = v.lhs_parent_stack; - while let (None, Some(PopEither)) = - (next_lhs_syntax.next_sibling(), next_lhs_stack.peek()) - { - next_lhs_syntax = next_lhs_syntax.parent().unwrap(); - next_lhs_stack = next_lhs_stack.pop(); - } - - let mut next_rhs_syntax = rhs_syntax; - let mut next_rhs_stack = v.rhs_parent_stack; - while let (None, Some(PopEither)) = - (next_rhs_syntax.next_sibling(), next_rhs_stack.peek()) - { - next_rhs_syntax = next_rhs_syntax.parent().unwrap(); - next_rhs_stack = next_rhs_stack.pop(); - } + let (next_lhs_syntax, next_lhs_stack) = skip_pop_either(lhs_syntax, v.lhs_parent_stack); + let (next_rhs_syntax, next_rhs_stack) = skip_pop_either(rhs_syntax, v.rhs_parent_stack); // Both nodes are equal, the happy case. add_neighbor( UnchangedNode { depth_difference }, Vertex { - lhs_syntax: next_sibling(next_lhs_syntax), - rhs_syntax: next_sibling(next_rhs_syntax), + lhs_syntax: next_lhs_syntax, + rhs_syntax: next_rhs_syntax, lhs_parent_stack: next_lhs_stack, rhs_parent_stack: next_rhs_stack, ..Vertex::default() @@ -549,29 +545,16 @@ pub fn get_neighbours<'syn, 'b>( let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - let mut next_lhs_syntax = lhs_syntax; - let mut next_lhs_stack = v.lhs_parent_stack; - while let (None, Some(PopEither)) = - (next_lhs_syntax.next_sibling(), next_lhs_stack.peek()) - { - next_lhs_syntax = next_lhs_syntax.parent().unwrap(); - next_lhs_stack = next_lhs_stack.pop(); - } - - let mut next_rhs_syntax = rhs_syntax; - let mut next_rhs_stack = v.rhs_parent_stack; - while let (None, Some(PopEither)) = - (next_rhs_syntax.next_sibling(), next_rhs_stack.peek()) - { - next_rhs_syntax = next_rhs_syntax.parent().unwrap(); - next_rhs_stack = next_rhs_stack.pop(); - } + let (next_lhs_syntax, next_lhs_stack) = + skip_pop_either(lhs_syntax, v.lhs_parent_stack); + let (next_rhs_syntax, next_rhs_stack) = + skip_pop_either(rhs_syntax, v.rhs_parent_stack); add_neighbor( ReplacedComment { levenshtein_pct }, Vertex { - lhs_syntax: next_sibling(next_lhs_syntax), - rhs_syntax: next_sibling(next_rhs_syntax), + lhs_syntax: next_lhs_syntax, + rhs_syntax: next_rhs_syntax, lhs_parent_stack: next_lhs_stack, rhs_parent_stack: next_rhs_stack, ..Vertex::default() @@ -606,13 +589,7 @@ pub fn get_neighbours<'syn, 'b>( v.lhs_parent_stack.push(PopEither), ) } else { - let mut next_syntax = lhs_syntax; - let mut next_stack = v.lhs_parent_stack; - while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) { - next_syntax = next_syntax.parent().unwrap(); - next_stack = next_stack.pop(); - } - (next_sibling(next_syntax), next_stack) + skip_pop_either(lhs_syntax, v.lhs_parent_stack) }; add_neighbor( @@ -650,13 +627,7 @@ pub fn get_neighbours<'syn, 'b>( v.rhs_parent_stack.push(PopEither), ) } else { - let mut next_syntax = rhs_syntax; - let mut next_stack = v.rhs_parent_stack; - while let (None, Some(PopEither)) = (next_syntax.next_sibling(), next_stack.peek()) { - next_syntax = next_syntax.parent().unwrap(); - next_stack = next_stack.pop(); - } - (next_sibling(next_syntax), next_stack) + skip_pop_either(rhs_syntax, v.rhs_parent_stack) }; add_neighbor( From edc5516eb90cc6c638e77140f2ec674378c04a73 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 11:12:27 +0800 Subject: [PATCH 40/58] Completely remove ExitDelimiter vertices --- src/diff/dijkstra.rs | 6 - src/diff/graph.rs | 263 ++++++++++++++++++++----------------------- 2 files changed, 119 insertions(+), 150 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 02fbd7e641..68f3e1ce66 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -253,7 +253,6 @@ mod tests { contiguous: false, probably_punctuation: false, }, - ExitDelimiter, ] ); } @@ -303,7 +302,6 @@ mod tests { contiguous: false, probably_punctuation: false }, - ExitDelimiter, ] ); } @@ -353,8 +351,6 @@ mod tests { UnchangedNode { depth_difference: 0 }, - ExitDelimiter, - ExitDelimiter, ], ); } @@ -434,7 +430,6 @@ mod tests { contiguous: true, probably_punctuation: false }, - ExitDelimiter, ] ); } @@ -476,7 +471,6 @@ mod tests { contiguous: true, probably_punctuation: false }, - ExitDelimiter, NovelAtomLHS { contiguous: true, probably_punctuation: true diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 1c8f90cead..860b94498d 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -315,7 +315,6 @@ pub enum Edge { EnterNovelDelimiterRHS { contiguous: bool, }, - ExitDelimiter, } const NOT_CONTIGUOUS_PENALTY: u64 = 50; @@ -323,12 +322,6 @@ const NOT_CONTIGUOUS_PENALTY: u64 = 50; impl Edge { pub fn cost(self) -> u64 { match self { - // When we're at the end of a list, there's only one exit - // delimiter possibility, so the cost doesn't matter. We - // choose a non-zero number as it's easier to reason - // about. - ExitDelimiter => 1, - // Matching nodes is always best. UnchangedNode { depth_difference } => min(40, u64::from(depth_difference) + 1), // Matching an outer delimiter is good. @@ -383,7 +376,6 @@ pub type SeenMap<'syn, 'b> = hashbrown::HashMap< BuildHasherDefault, >; -#[inline(never)] fn allocate_if_new<'syn, 'b>( v: Vertex<'syn, 'b>, alloc: &'b Bump, @@ -417,15 +409,46 @@ fn looks_like_punctuation(content: &str) -> bool { content == "," || content == ";" || content == "." } -fn skip_pop_either<'a>( - mut syntax: &'a Syntax<'a>, - mut stack: BitStack, -) -> (SideSyntax<'a>, BitStack) { - while let (None, Some(PopEither)) = (syntax.next_sibling(), stack.peek()) { - syntax = syntax.parent().unwrap(); - stack = stack.pop(); +#[inline(always)] +fn next_vertex<'a, 'b>( + mut lhs_syntax: SideSyntax<'a>, + mut rhs_syntax: SideSyntax<'a>, + mut lhs_parent_stack: BitStack, + mut rhs_parent_stack: BitStack, +) -> Vertex<'a, 'b> { + loop { + while let (None, Some(PopEither)) = (lhs_syntax.get_side(), lhs_parent_stack.peek()) { + lhs_syntax = next_sibling(lhs_syntax.parent().unwrap()); + lhs_parent_stack = lhs_parent_stack.pop(); + } + + while let (None, Some(PopEither)) = (rhs_syntax.get_side(), rhs_parent_stack.peek()) { + rhs_syntax = next_sibling(rhs_syntax.parent().unwrap()); + rhs_parent_stack = rhs_parent_stack.pop(); + } + + if let (None, None, Some(PopBoth), Some(PopBoth)) = ( + lhs_syntax.get_side(), + rhs_syntax.get_side(), + lhs_parent_stack.peek(), + rhs_parent_stack.peek(), + ) { + lhs_syntax = next_sibling(lhs_syntax.parent().unwrap()); + rhs_syntax = next_sibling(rhs_syntax.parent().unwrap()); + lhs_parent_stack = lhs_parent_stack.pop(); + rhs_parent_stack = rhs_parent_stack.pop(); + } else { + break; + } + } + + Vertex { + lhs_syntax, + rhs_syntax, + lhs_parent_stack, + rhs_parent_stack, + ..Default::default() } - (next_sibling(syntax), stack) } /// Compute the neighbours of `v`. @@ -445,49 +468,23 @@ pub fn get_neighbours<'syn, 'b>( }, ); - let v_info = ( - v.lhs_syntax.get_side(), - v.rhs_syntax.get_side(), - v.lhs_parent_stack.peek(), - v.rhs_parent_stack.peek(), - ); - - if let (None, None, Some(PopBoth), Some(PopBoth)) = v_info { - // We have exhausted all the nodes on both lists, so we can - // move up to the parent node. - - // Continue from sibling of parent. - add_neighbor( - ExitDelimiter, - Vertex { - lhs_syntax: next_sibling(v.lhs_syntax.parent().unwrap()), - rhs_syntax: next_sibling(v.rhs_syntax.parent().unwrap()), - lhs_parent_stack: v.lhs_parent_stack.pop(), - rhs_parent_stack: v.rhs_parent_stack.pop(), - ..Vertex::default() - }, - ); - } + let v_info = (v.lhs_syntax.get_side(), v.rhs_syntax.get_side()); - if let (Some(lhs_syntax), Some(rhs_syntax), _, _) = v_info { + if let (Some(lhs_syntax), Some(rhs_syntax)) = v_info { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); - let (next_lhs_syntax, next_lhs_stack) = skip_pop_either(lhs_syntax, v.lhs_parent_stack); - let (next_rhs_syntax, next_rhs_stack) = skip_pop_either(rhs_syntax, v.rhs_parent_stack); - // Both nodes are equal, the happy case. add_neighbor( UnchangedNode { depth_difference }, - Vertex { - lhs_syntax: next_lhs_syntax, - rhs_syntax: next_rhs_syntax, - lhs_parent_stack: next_lhs_stack, - rhs_parent_stack: next_rhs_stack, - ..Vertex::default() - }, + next_vertex( + next_sibling(lhs_syntax), + next_sibling(rhs_syntax), + v.lhs_parent_stack, + v.rhs_parent_stack, + ), ); } @@ -515,13 +512,12 @@ pub fn get_neighbours<'syn, 'b>( add_neighbor( EnterUnchangedDelimiter { depth_difference }, - Vertex { - lhs_syntax: next_child(lhs_syntax, lhs_children), - rhs_syntax: next_child(rhs_syntax, rhs_children), - lhs_parent_stack: v.lhs_parent_stack.push(PopBoth), - rhs_parent_stack: v.rhs_parent_stack.push(PopBoth), - ..Vertex::default() - }, + next_vertex( + next_child(lhs_syntax, lhs_children), + next_child(rhs_syntax, rhs_children), + v.lhs_parent_stack.push(PopBoth), + v.rhs_parent_stack.push(PopBoth), + ), ); } } @@ -545,101 +541,83 @@ pub fn get_neighbours<'syn, 'b>( let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - let (next_lhs_syntax, next_lhs_stack) = - skip_pop_either(lhs_syntax, v.lhs_parent_stack); - let (next_rhs_syntax, next_rhs_stack) = - skip_pop_either(rhs_syntax, v.rhs_parent_stack); - add_neighbor( ReplacedComment { levenshtein_pct }, - Vertex { - lhs_syntax: next_lhs_syntax, - rhs_syntax: next_rhs_syntax, - lhs_parent_stack: next_lhs_stack, - rhs_parent_stack: next_rhs_stack, - ..Vertex::default() - }, + next_vertex( + next_sibling(lhs_syntax), + next_sibling(rhs_syntax), + v.lhs_parent_stack, + v.rhs_parent_stack, + ), ); } } } - if let (Some(lhs_syntax), _, _, _) = v_info { - let (edge, child) = match lhs_syntax { - Syntax::Atom { content, .. } => ( - NovelAtomLHS { - // TODO: should this apply if prev is a parent - // node rather than a sibling? - contiguous: lhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, - None, - ), - Syntax::List { children, .. } => ( - EnterNovelDelimiterLHS { - contiguous: lhs_syntax.prev_is_contiguous(), - }, - children.get(0).copied(), - ), - }; - - let (next_syntax, next_stack) = if let Some(child) = child { - ( - SideSyntax::from_side(child), - v.lhs_parent_stack.push(PopEither), - ) - } else { - skip_pop_either(lhs_syntax, v.lhs_parent_stack) + if let (Some(lhs_syntax), _) = v_info { + match lhs_syntax { + Syntax::Atom { content, .. } => { + add_neighbor( + NovelAtomLHS { + // TODO: should this apply if prev is a parent + // node rather than a sibling? + contiguous: lhs_syntax.prev_is_contiguous(), + probably_punctuation: looks_like_punctuation(content), + }, + next_vertex( + next_sibling(lhs_syntax), + v.rhs_syntax, + v.lhs_parent_stack, + v.rhs_parent_stack, + ), + ); + } + Syntax::List { children, .. } => { + add_neighbor( + EnterNovelDelimiterLHS { + contiguous: lhs_syntax.prev_is_contiguous(), + }, + next_vertex( + next_child(lhs_syntax, children), + v.rhs_syntax, + v.lhs_parent_stack.push(PopEither), + v.rhs_parent_stack, + ), + ); + } }; - - add_neighbor( - edge, - Vertex { - lhs_syntax: next_syntax, - rhs_syntax: v.rhs_syntax, - lhs_parent_stack: next_stack, - rhs_parent_stack: v.rhs_parent_stack, - ..Vertex::default() - }, - ); } - if let (_, Some(rhs_syntax), _, _) = v_info { - let (edge, child) = match rhs_syntax { - Syntax::Atom { content, .. } => ( - NovelAtomRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, - None, - ), - Syntax::List { children, .. } => ( - EnterNovelDelimiterRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - }, - children.get(0).copied(), - ), - }; - - let (next_syntax, next_stack) = if let Some(child) = child { - ( - SideSyntax::from_side(child), - v.rhs_parent_stack.push(PopEither), - ) - } else { - skip_pop_either(rhs_syntax, v.rhs_parent_stack) + if let (_, Some(rhs_syntax)) = v_info { + match rhs_syntax { + Syntax::Atom { content, .. } => { + add_neighbor( + NovelAtomRHS { + contiguous: rhs_syntax.prev_is_contiguous(), + probably_punctuation: looks_like_punctuation(content), + }, + next_vertex( + v.lhs_syntax, + next_sibling(rhs_syntax), + v.lhs_parent_stack, + v.rhs_parent_stack, + ), + ); + } + Syntax::List { children, .. } => { + add_neighbor( + EnterNovelDelimiterRHS { + contiguous: rhs_syntax.prev_is_contiguous(), + }, + next_vertex( + v.lhs_syntax, + next_child(rhs_syntax, children), + v.lhs_parent_stack, + v.rhs_parent_stack.push(PopEither), + ), + ); + } }; - - add_neighbor( - edge, - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_syntax, - lhs_parent_stack: v.lhs_parent_stack, - rhs_parent_stack: next_stack, - ..Vertex::default() - }, - ); } } @@ -651,9 +629,6 @@ pub fn populate_change_map<'a, 'b>( for (e, v) in route { match e { - ExitDelimiter => { - // Nothing to do: we have already marked this node when we entered it. - } UnchangedNode { .. } => { // No change on this node or its children. let lhs = v.lhs_syntax.get_side().unwrap(); From adf6077f6f5719d48c2a9fcabcabc2c9af68f48f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 7 Oct 2022 11:25:06 +0800 Subject: [PATCH 41/58] Add more #[inline(always)] --- src/diff/graph.rs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 860b94498d..2afad4d083 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -14,7 +14,6 @@ use crate::{ diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}, parse::syntax::{AtomKind, Syntax}, }; -use Edge::*; /// Compress two `&Syntax` into a usize. /// @@ -82,17 +81,6 @@ impl<'a> SideSyntax<'a> { } } -fn next_sibling<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { - let parent = SideSyntax::from_parent(syntax.parent()); - syntax.next_sibling().map_or(parent, SideSyntax::from_side) -} - -fn next_child<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { - let parent = SideSyntax::from_parent(Some(syntax)); - let child = children.get(0).copied(); - child.map_or(parent, SideSyntax::from_side) -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u64)] enum EnteredDelimiter { @@ -317,6 +305,8 @@ pub enum Edge { }, } +use Edge::*; + const NOT_CONTIGUOUS_PENALTY: u64 = 50; impl Edge { @@ -409,6 +399,19 @@ fn looks_like_punctuation(content: &str) -> bool { content == "," || content == ";" || content == "." } +#[inline(always)] +fn next_sibling<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { + let parent = SideSyntax::from_parent(syntax.parent()); + syntax.next_sibling().map_or(parent, SideSyntax::from_side) +} + +#[inline(always)] +fn next_child<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { + let parent = SideSyntax::from_parent(Some(syntax)); + let child = children.get(0).copied(); + child.map_or(parent, SideSyntax::from_side) +} + #[inline(always)] fn next_vertex<'a, 'b>( mut lhs_syntax: SideSyntax<'a>, From 1ede2731ff6c6e2c3a1d1edef441f661f73f57d3 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 11 Oct 2022 14:27:33 +0800 Subject: [PATCH 42/58] Remove root field from SyntaxInfo --- src/diff/graph.rs | 12 ------------ src/parse/syntax.rs | 26 -------------------------- 2 files changed, 38 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 2afad4d083..f697767b6c 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -52,16 +52,6 @@ impl<'a> SideSyntax<'a> { } } - pub fn root(&self) -> Option<&'a Syntax<'a>> { - match self.get_side() { - Some(side) => Some(side.root()), - None => match self.data ^ 1 { - 0 => None, - d => Some(unsafe { &*(d as *const Syntax<'a>) }.root()), - }, - } - } - pub fn from_side(side: &'a Syntax<'a>) -> Self { Self { data: side as *const _ as _, @@ -209,8 +199,6 @@ impl<'a, 'b> Vertex<'a, 'b> { fn parents_eq(&self, other: &Vertex<'a, 'b>) -> bool { self.lhs_parent_stack == other.lhs_parent_stack && self.rhs_parent_stack == other.rhs_parent_stack - && self.lhs_syntax.root() == other.lhs_syntax.root() - && self.rhs_syntax.root() == other.rhs_syntax.root() } fn can_pop_either(&self) -> bool { diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index d1aa3cbacc..b1829fbeb8 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -47,8 +47,6 @@ pub struct SyntaxInfo<'a> { prev: Cell>>, /// The parent syntax node, if present. parent: Cell>>, - /// The root syntax node. - root: Cell>>, /// Does the previous syntax node occur on the same line as the /// first line of this node? prev_is_contiguous: Cell, @@ -75,7 +73,6 @@ impl<'a> SyntaxInfo<'a> { next_sibling: Cell::new(None), prev: Cell::new(None), parent: Cell::new(None), - root: Cell::new(None), prev_is_contiguous: Cell::new(false), num_ancestors: Cell::new(0), num_after: Cell::new(0), @@ -271,10 +268,6 @@ impl<'a> Syntax<'a> { self.info().parent.get() } - pub fn root(&self) -> &'a Syntax<'a> { - self.info().root.get().unwrap() - } - pub fn next_sibling(&self) -> Option<&'a Syntax<'a>> { self.info().next_sibling.get() } @@ -432,7 +425,6 @@ fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) { } } pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { - set_root(roots); set_prev_sibling(roots); set_next_sibling(roots); set_prev(roots, None); @@ -536,24 +528,6 @@ fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { } } -fn set_root<'a>(nodes: &[&'a Syntax<'a>]) { - fn inner<'a>(nodes: &[&'a Syntax<'a>], root: &'a Syntax<'a>) { - for node in nodes { - node.info().root.set(Some(root)); - if let List { children, .. } = node { - inner(children, root); - } - } - } - - for node in nodes { - node.info().root.set(Some(node)); - if let List { children, .. } = node { - inner(children, node); - } - } -} - fn set_num_ancestors(nodes: &[&Syntax], num_ancestors: u32) { for node in nodes { node.info().num_ancestors.set(num_ancestors); From bc09acca46c81bb8e4e364b565b08b58f0780600 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 12 Nov 2022 20:59:55 +0800 Subject: [PATCH 43/58] Fix typo --- src/diff/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index f697767b6c..7febe65894 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -90,7 +90,7 @@ enum EnteredDelimiter { use EnteredDelimiter::*; -/// LSP is the stack top. One bit represents one `EnteredDelimiter`. +/// LSB is the stack top. One bit represents one `EnteredDelimiter`. /// /// Assume the underlying type is u8, then /// From b95c3a6cf788c71bbfbb8ad58e295e88e56dd92d Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 13 Nov 2022 03:32:55 +0800 Subject: [PATCH 44/58] Refactor parent stack --- src/diff/graph.rs | 181 ++++++++++++++++++---------------------------- 1 file changed, 72 insertions(+), 109 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7febe65894..186f98b192 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -71,70 +71,8 @@ impl<'a> SideSyntax<'a> { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u64)] -enum EnteredDelimiter { - /// If we've entered the LHS or RHS separately, we can pop either - /// side independently. - PopEither = 0, - /// If we've entered the LHS and RHS together, we must pop both - /// sides together too. Otherwise we'd consider the following - /// case to have no changes. - /// - /// ```text - /// Old: (a b c) - /// New: (a b) c - /// ``` - PopBoth = 1, -} - -use EnteredDelimiter::*; - -/// LSB is the stack top. One bit represents one `EnteredDelimiter`. -/// -/// Assume the underlying type is u8, then -/// -/// ```text -/// new: 0b00000001 -/// push x: 0b0000001x -/// push y: 0b000001xy -/// pop: 0b0000001x -/// ``` -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -struct BitStack(u64); - -impl BitStack { - fn new() -> Self { - Self(1) - } - - fn is_empty(&self) -> bool { - self.0 == 1 - } - - fn is_full(&self) -> bool { - self.0 & (1 << 63) != 0 - } - - fn peek(&self) -> Option { - if self.is_empty() { - None - } else if self.0 & 1 == PopEither as u64 { - Some(PopEither) - } else { - Some(PopBoth) - } - } - - fn push(&self, tag: EnteredDelimiter) -> Self { - assert!(!self.is_full(), "BitStack is full!"); - Self((self.0 << 1) | tag as u64) - } - - fn pop(&self) -> Self { - assert!(!self.is_empty(), "BitStack is empty!"); - Self(self.0 >> 1) - } +fn get_ptr(opt: Option<&T>) -> *const T { + opt.map_or(std::ptr::null::(), |t| t as *const _) } /// A vertex in a directed acyclic graph that represents a diff. @@ -167,21 +105,36 @@ impl BitStack { /// ``` #[derive(Debug)] pub struct Vertex<'a, 'b> { + // states pub is_visited: Cell, pub shortest_distance: Cell, pub predecessor: Cell)>>, + + // core info pub lhs_syntax: SideSyntax<'a>, pub rhs_syntax: SideSyntax<'a>, - lhs_parent_stack: BitStack, - rhs_parent_stack: BitStack, + /// If we've entered the LHS and RHS together, we must pop both + /// sides together too. Otherwise we'd consider the following + /// case to have no changes. + /// + /// ```text + /// Old: (a b c) + /// New: (a b) c + /// ``` + pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, + /// If we've entered the LHS or RHS separately, we can pop either + /// side independently. + pop_lhs_cnt: u16, + pop_rhs_cnt: u16, } impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { !self.lhs_syntax.is_side() && !self.rhs_syntax.is_side() - && self.lhs_parent_stack.is_empty() - && self.rhs_parent_stack.is_empty() + && self.pop_both_ancestor.is_none() + && self.pop_lhs_cnt == 0 + && self.pop_rhs_cnt == 0 } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { @@ -191,22 +144,21 @@ impl<'a, 'b> Vertex<'a, 'b> { predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), rhs_syntax: rhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), - lhs_parent_stack: BitStack::new(), - rhs_parent_stack: BitStack::new(), + pop_both_ancestor: None, + pop_lhs_cnt: 0, + pop_rhs_cnt: 0, } } - fn parents_eq(&self, other: &Vertex<'a, 'b>) -> bool { - self.lhs_parent_stack == other.lhs_parent_stack - && self.rhs_parent_stack == other.rhs_parent_stack + fn parent_stack_eq(&self, other: &Vertex<'a, 'b>) -> bool { + // Vertices are pinned so ptrs are unique IDs + get_ptr(self.pop_both_ancestor) == get_ptr(other.pop_both_ancestor) + && self.pop_lhs_cnt == other.pop_lhs_cnt + && self.pop_rhs_cnt == other.pop_rhs_cnt } fn can_pop_either(&self) -> bool { - // self.lhs_parent_stack.peek() == Some(PopEither) - // || self.rhs_parent_stack.peek() == Some(PopEither) - - // Utilize the fact that PopEither = 0 while stack top indicator is 1. - self.lhs_parent_stack.0 & 1 == 0 || self.rhs_parent_stack.0 & 1 == 0 + self.pop_lhs_cnt != 0 || self.pop_rhs_cnt != 0 } } @@ -364,7 +316,7 @@ fn allocate_if_new<'syn, 'b>( if let Some(existing) = value { return existing; } - if key.parents_eq(&v) { + if key.parent_stack_eq(&v) { return key; } let allocated = alloc.alloc(v); @@ -404,30 +356,33 @@ fn next_child<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSy fn next_vertex<'a, 'b>( mut lhs_syntax: SideSyntax<'a>, mut rhs_syntax: SideSyntax<'a>, - mut lhs_parent_stack: BitStack, - mut rhs_parent_stack: BitStack, + mut pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, + mut pop_lhs_cnt: u16, + mut pop_rhs_cnt: u16, ) -> Vertex<'a, 'b> { loop { - while let (None, Some(PopEither)) = (lhs_syntax.get_side(), lhs_parent_stack.peek()) { + while !lhs_syntax.is_side() && pop_lhs_cnt > 0 { lhs_syntax = next_sibling(lhs_syntax.parent().unwrap()); - lhs_parent_stack = lhs_parent_stack.pop(); + pop_lhs_cnt -= 1; } - while let (None, Some(PopEither)) = (rhs_syntax.get_side(), rhs_parent_stack.peek()) { + while !rhs_syntax.is_side() && pop_rhs_cnt > 0 { rhs_syntax = next_sibling(rhs_syntax.parent().unwrap()); - rhs_parent_stack = rhs_parent_stack.pop(); + pop_rhs_cnt -= 1; } - if let (None, None, Some(PopBoth), Some(PopBoth)) = ( - lhs_syntax.get_side(), - rhs_syntax.get_side(), - lhs_parent_stack.peek(), - rhs_parent_stack.peek(), + if let (false, false, Some(ancestor), 0, 0) = ( + lhs_syntax.is_side(), + rhs_syntax.is_side(), + pop_both_ancestor, + pop_lhs_cnt, + pop_rhs_cnt, ) { lhs_syntax = next_sibling(lhs_syntax.parent().unwrap()); rhs_syntax = next_sibling(rhs_syntax.parent().unwrap()); - lhs_parent_stack = lhs_parent_stack.pop(); - rhs_parent_stack = rhs_parent_stack.pop(); + pop_both_ancestor = ancestor.pop_both_ancestor; + pop_lhs_cnt = ancestor.pop_lhs_cnt; + pop_rhs_cnt = ancestor.pop_rhs_cnt; } else { break; } @@ -436,15 +391,16 @@ fn next_vertex<'a, 'b>( Vertex { lhs_syntax, rhs_syntax, - lhs_parent_stack, - rhs_parent_stack, + pop_both_ancestor, + pop_lhs_cnt, + pop_rhs_cnt, ..Default::default() } } /// Compute the neighbours of `v`. pub fn get_neighbours<'syn, 'b>( - v: &Vertex<'syn, 'b>, + v: &'b Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, neighbors: &mut Vec<(Edge, &'b Vertex<'syn, 'b>)>, @@ -473,8 +429,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( next_sibling(lhs_syntax), next_sibling(rhs_syntax), - v.lhs_parent_stack, - v.rhs_parent_stack, + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, ), ); } @@ -506,8 +463,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( next_child(lhs_syntax, lhs_children), next_child(rhs_syntax, rhs_children), - v.lhs_parent_stack.push(PopBoth), - v.rhs_parent_stack.push(PopBoth), + Some(v), + 0, + 0, ), ); } @@ -537,8 +495,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( next_sibling(lhs_syntax), next_sibling(rhs_syntax), - v.lhs_parent_stack, - v.rhs_parent_stack, + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, ), ); } @@ -558,8 +517,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( next_sibling(lhs_syntax), v.rhs_syntax, - v.lhs_parent_stack, - v.rhs_parent_stack, + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, ), ); } @@ -571,8 +531,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( next_child(lhs_syntax, children), v.rhs_syntax, - v.lhs_parent_stack.push(PopEither), - v.rhs_parent_stack, + v.pop_both_ancestor, + v.pop_lhs_cnt + 1, + v.pop_rhs_cnt, ), ); } @@ -590,8 +551,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( v.lhs_syntax, next_sibling(rhs_syntax), - v.lhs_parent_stack, - v.rhs_parent_stack, + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, ), ); } @@ -603,8 +565,9 @@ pub fn get_neighbours<'syn, 'b>( next_vertex( v.lhs_syntax, next_child(rhs_syntax, children), - v.lhs_parent_stack, - v.rhs_parent_stack.push(PopEither), + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt + 1, ), ); } From 153502ef5cef836fd73839c4a80aabafb02f3f26 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 13 Nov 2022 23:50:57 +0800 Subject: [PATCH 45/58] Optimize from_parent --- src/diff/graph.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 186f98b192..23c5f8777d 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -60,12 +60,8 @@ impl<'a> SideSyntax<'a> { } pub fn from_parent(parent: Option<&'a Syntax<'a>>) -> Self { - let data = match parent { - Some(p) => p as *const _ as usize | 1, - None => 1, - }; Self { - data, + data: parent.map_or(0, |s| s as *const _ as usize) | 1, phantom: PhantomData, } } From cea109ecb671bedd9dc8c2f3dcb7c511cadcfd6a Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Mon, 14 Nov 2022 00:42:04 +0800 Subject: [PATCH 46/58] Use insert_unique_unchecked --- src/diff/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 23c5f8777d..b769bb91cd 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -321,7 +321,7 @@ fn allocate_if_new<'syn, 'b>( } None => { let allocated = alloc.alloc(v); - seen.insert(allocated, None); + seen.insert_unique_unchecked(allocated, None); allocated } } From 0eb70c84acc469fdc9eec787a7dd0e80dc63ab91 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sun, 20 Nov 2022 22:38:52 +0800 Subject: [PATCH 47/58] Fix arena reservation --- src/diff/dijkstra.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 68f3e1ce66..dbca6ac02e 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -123,7 +123,7 @@ pub fn mark_syntax<'a>( let size_hint = lhs_node_count * rhs_node_count; let start = Vertex::new(lhs_syntax, rhs_syntax); - let vertex_arena = Bump::with_capacity(size_hint); + let vertex_arena = Bump::with_capacity(size_hint * std::mem::size_of::()); let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?; From a0c32931cfe14bf5f22ff680325e32fd2d85e12a Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Mon, 21 Nov 2022 17:22:19 +0800 Subject: [PATCH 48/58] Reduce branches of get_neighbours --- src/diff/graph.rs | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index b769bb91cd..bd7625e013 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -430,9 +430,7 @@ pub fn get_neighbours<'syn, 'b>( v.pop_rhs_cnt, ), ); - } - - if let ( + } else if let ( Syntax::List { open_content: lhs_open_content, close_content: lhs_close_content, @@ -465,9 +463,7 @@ pub fn get_neighbours<'syn, 'b>( ), ); } - } - - if let ( + } else if let ( Syntax::Atom { content: lhs_content, kind: AtomKind::Comment, @@ -480,23 +476,20 @@ pub fn get_neighbours<'syn, 'b>( }, ) = (lhs_syntax, rhs_syntax) { - // Both sides are comments and their content is reasonably - // similar. - if lhs_content != rhs_content { - let levenshtein_pct = - (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; + // Both sides are comments and their content is reasonably similar. + let levenshtein_pct = + (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - add_neighbor( - ReplacedComment { levenshtein_pct }, - next_vertex( - next_sibling(lhs_syntax), - next_sibling(rhs_syntax), - v.pop_both_ancestor, - v.pop_lhs_cnt, - v.pop_rhs_cnt, - ), - ); - } + add_neighbor( + ReplacedComment { levenshtein_pct }, + next_vertex( + next_sibling(lhs_syntax), + next_sibling(rhs_syntax), + v.pop_both_ancestor, + v.pop_lhs_cnt, + v.pop_rhs_cnt, + ), + ); } } From afe07ea55faf02d8d4bed1f48cf7c71f389e6cb7 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 02:58:33 +0800 Subject: [PATCH 49/58] Pre-compute edge cost to reduce the size of Vertex --- src/diff/dijkstra.rs | 130 ++++++------------------------ src/diff/graph.rs | 183 +++++++++++++++++-------------------------- 2 files changed, 95 insertions(+), 218 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index dbca6ac02e..7bec84eb8d 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -52,12 +52,13 @@ fn shortest_path<'a, 'b>( neighbors.clear(); get_neighbours(current, vertex_arena, &mut seen, &mut neighbors); - for &(edge, next) in neighbors.iter() { - let distance_to_next = distance + edge.cost(); + for &(edge, cost, next) in neighbors.iter() { + let distance_to_next = distance + cost; if distance_to_next < next.shortest_distance.get() { next.shortest_distance.set(distance_to_next); - next.predecessor.set(Some((edge, current))); + next.last_edge.set(Some(edge)); + next.predecessor.set(Some(current)); heap.push(Reverse(distance_to_next), next); } } @@ -77,8 +78,11 @@ fn shortest_path<'a, 'b>( heap.len(), ); - let mut vertex_route = - successors(end.predecessor.get(), |&(_, node)| node.predecessor.get()).collect::>(); + let mut vertex_route = successors( + end.last_edge.get().zip(end.predecessor.get()), + |&(_, node)| node.last_edge.get().zip(node.predecessor.get()), + ) + .collect::>(); vertex_route.reverse(); Ok(vertex_route) } @@ -139,14 +143,13 @@ pub fn mark_syntax<'a>( .iter() .map(|x| { format!( - "{:20} {:20} --- {:3} {:?}", + "{:20} {:20} --- {:?}", x.1.lhs_syntax .get_side() .map_or_else(|| "None".into(), Syntax::dbg_content), x.1.rhs_syntax .get_side() .map_or_else(|| "None".into(), Syntax::dbg_content), - x.0.cost(), x.0, ) }) @@ -202,12 +205,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![UnchangedNode { - depth_difference: 0 - }] - ); + assert_eq!(actions, vec![UnchangedNode]); } #[test] @@ -243,18 +241,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - EnterUnchangedDelimiter { - depth_difference: 0 - }, - NovelAtomLHS { - contiguous: false, - probably_punctuation: false, - }, - ] - ); + assert_eq!(actions, vec![EnterUnchangedDelimiter, NovelAtomLHS,]); } #[test] @@ -290,19 +277,7 @@ mod tests { let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( actions, - vec![ - EnterUnchangedDelimiter { - depth_difference: 0 - }, - NovelAtomRHS { - contiguous: false, - probably_punctuation: false - }, - NovelAtomRHS { - contiguous: false, - probably_punctuation: false - }, - ] + vec![EnterUnchangedDelimiter, NovelAtomRHS, NovelAtomRHS,] ); } @@ -343,14 +318,10 @@ mod tests { assert_eq!( actions, vec![ - EnterNovelDelimiterRHS { contiguous: false }, - EnterNovelDelimiterLHS { contiguous: false }, - UnchangedNode { - depth_difference: 0 - }, - UnchangedNode { - depth_difference: 0 - }, + EnterNovelDelimiterRHS, + EnterNovelDelimiterLHS, + UnchangedNode, + UnchangedNode, ], ); } @@ -378,22 +349,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - UnchangedNode { - depth_difference: 0 - }, - NovelAtomLHS { - contiguous: false, - probably_punctuation: false - }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: false - }, - ] - ); + assert_eq!(actions, vec![UnchangedNode, NovelAtomLHS, NovelAtomLHS,]); } #[test] @@ -422,16 +378,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - EnterNovelDelimiterLHS { contiguous: false }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: false - }, - ] - ); + assert_eq!(actions, vec![EnterNovelDelimiterLHS, NovelAtomLHS,]); } #[test] @@ -465,17 +412,7 @@ mod tests { let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( actions, - vec![ - EnterNovelDelimiterLHS { contiguous: false }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: false - }, - NovelAtomLHS { - contiguous: true, - probably_punctuation: true - }, - ] + vec![EnterNovelDelimiterLHS, NovelAtomLHS, NovelAtomLHS,] ); } @@ -503,12 +440,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ReplacedComment { - levenshtein_pct: 84 - }] - ); + assert_eq!(actions, vec![ReplacedComment]); } #[test] @@ -535,12 +467,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ReplacedComment { - levenshtein_pct: 11 - }] - ); + assert_eq!(actions, vec![NovelComment]); } #[test] @@ -575,18 +502,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![ - ReplacedComment { - levenshtein_pct: 95 - }, - NovelAtomLHS { - contiguous: false, - probably_punctuation: false - } - ] - ); + assert_eq!(actions, vec![ReplacedComment, NovelAtomLHS]); } #[test] diff --git a/src/diff/graph.rs b/src/diff/graph.rs index bd7625e013..a3394e2446 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -104,7 +104,8 @@ pub struct Vertex<'a, 'b> { // states pub is_visited: Cell, pub shortest_distance: Cell, - pub predecessor: Cell)>>, + pub last_edge: Cell>, + pub predecessor: Cell>>, // core info pub lhs_syntax: SideSyntax<'a>, @@ -137,6 +138,7 @@ impl<'a, 'b> Vertex<'a, 'b> { Vertex { is_visited: Cell::new(false), shortest_distance: Cell::new(u64::MAX), + last_edge: Cell::new(None), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), rhs_syntax: rhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), @@ -214,88 +216,22 @@ impl Default for Vertex<'_, '_> { /// See [`neighbours`] for all the edges available for a given `Vertex`. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum Edge { - UnchangedNode { - depth_difference: u32, - }, - EnterUnchangedDelimiter { - depth_difference: u32, - }, - ReplacedComment { - levenshtein_pct: u8, - }, - NovelAtomLHS { - contiguous: bool, - probably_punctuation: bool, - }, - NovelAtomRHS { - contiguous: bool, - probably_punctuation: bool, - }, + UnchangedNode, + EnterUnchangedDelimiter, + ReplacedComment, + NovelComment, + NovelAtomLHS, + NovelAtomRHS, // TODO: An EnterNovelDelimiterBoth edge might help performance // rather doing LHS and RHS separately. - EnterNovelDelimiterLHS { - contiguous: bool, - }, - EnterNovelDelimiterRHS { - contiguous: bool, - }, + EnterNovelDelimiterLHS, + EnterNovelDelimiterRHS, } use Edge::*; const NOT_CONTIGUOUS_PENALTY: u64 = 50; -impl Edge { - pub fn cost(self) -> u64 { - match self { - // Matching nodes is always best. - UnchangedNode { depth_difference } => min(40, u64::from(depth_difference) + 1), - // Matching an outer delimiter is good. - EnterUnchangedDelimiter { depth_difference } => { - 100 + min(40, u64::from(depth_difference)) - } - - // Replacing a comment is better than treating it as novel. - ReplacedComment { levenshtein_pct } => 150 + u64::from(100 - levenshtein_pct), - - // Otherwise, we've added/removed a node. - NovelAtomLHS { - contiguous, - probably_punctuation, - } - | NovelAtomRHS { - contiguous, - probably_punctuation, - } => { - let mut cost = 300; - if !contiguous { - cost += NOT_CONTIGUOUS_PENALTY; - } - // If it's only punctuation, decrease the cost - // slightly. It's better to have novel punctuation - // than novel variable names. - if probably_punctuation { - cost -= 10; - } - cost - } - EnterNovelDelimiterLHS { contiguous } | EnterNovelDelimiterRHS { contiguous } => { - let mut cost = 300; - if !contiguous { - // This needs to be more than 40 greater than the - // contiguous case. Otherwise, we end up choosing - // a badly positioned unchanged delimiter just - // because it has a better depth difference. - // - // TODO: write a test for this case. - cost += NOT_CONTIGUOUS_PENALTY; - } - cost - } - } - } -} - pub type SeenMap<'syn, 'b> = hashbrown::HashMap< &'b Vertex<'syn, 'b>, Option<&'b Vertex<'syn, 'b>>, @@ -399,14 +335,14 @@ pub fn get_neighbours<'syn, 'b>( v: &'b Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, - neighbors: &mut Vec<(Edge, &'b Vertex<'syn, 'b>)>, + neighbors: &mut Vec<(Edge, u64, &'b Vertex<'syn, 'b>)>, ) { let mut add_neighbor = std::convert::identity( #[inline(always)] - |edge, vertex| { + |edge, cost, vertex| { let neighbor = allocate_if_new(vertex, alloc, seen); if !neighbor.is_visited.get() { - neighbors.push((edge, neighbor)); + neighbors.push((edge, cost, neighbor)); } }, ); @@ -415,13 +351,15 @@ pub fn get_neighbours<'syn, 'b>( if let (Some(lhs_syntax), Some(rhs_syntax)) = v_info { if lhs_syntax == rhs_syntax { + // Both nodes are equal, the happy case. let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); + let cost = min(40, u64::from(depth_difference) + 1); - // Both nodes are equal, the happy case. add_neighbor( - UnchangedNode { depth_difference }, + UnchangedNode, + cost, next_vertex( next_sibling(lhs_syntax), next_sibling(rhs_syntax), @@ -451,9 +389,11 @@ pub fn get_neighbours<'syn, 'b>( let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); + let cost = 100 + min(40, u64::from(depth_difference)); add_neighbor( - EnterUnchangedDelimiter { depth_difference }, + EnterUnchangedDelimiter, + cost, next_vertex( next_child(lhs_syntax, lhs_children), next_child(rhs_syntax, rhs_children), @@ -479,9 +419,15 @@ pub fn get_neighbours<'syn, 'b>( // Both sides are comments and their content is reasonably similar. let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; + let cost = 150 + u64::from(100 - levenshtein_pct); add_neighbor( - ReplacedComment { levenshtein_pct }, + if levenshtein_pct > 40 { + ReplacedComment + } else { + NovelComment + }, + cost, next_vertex( next_sibling(lhs_syntax), next_sibling(rhs_syntax), @@ -494,15 +440,25 @@ pub fn get_neighbours<'syn, 'b>( } if let (Some(lhs_syntax), _) = v_info { + let mut cost = 300; + // TODO: should this apply if prev is a parent + // node rather than a sibling? + if !lhs_syntax.prev_is_contiguous() { + cost += NOT_CONTIGUOUS_PENALTY; + } + match lhs_syntax { Syntax::Atom { content, .. } => { + // If it's only punctuation, decrease the cost + // slightly. It's better to have novel punctuation + // than novel variable names. + if looks_like_punctuation(content) { + cost -= 10; + } + add_neighbor( - NovelAtomLHS { - // TODO: should this apply if prev is a parent - // node rather than a sibling? - contiguous: lhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, + NovelAtomLHS, + cost, next_vertex( next_sibling(lhs_syntax), v.rhs_syntax, @@ -514,9 +470,8 @@ pub fn get_neighbours<'syn, 'b>( } Syntax::List { children, .. } => { add_neighbor( - EnterNovelDelimiterLHS { - contiguous: lhs_syntax.prev_is_contiguous(), - }, + EnterNovelDelimiterLHS, + cost, next_vertex( next_child(lhs_syntax, children), v.rhs_syntax, @@ -530,13 +485,20 @@ pub fn get_neighbours<'syn, 'b>( } if let (_, Some(rhs_syntax)) = v_info { + let mut cost = 300; + if !rhs_syntax.prev_is_contiguous() { + cost += NOT_CONTIGUOUS_PENALTY; + } + match rhs_syntax { Syntax::Atom { content, .. } => { + if looks_like_punctuation(content) { + cost -= 10; + } + add_neighbor( - NovelAtomRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - probably_punctuation: looks_like_punctuation(content), - }, + NovelAtomRHS, + cost, next_vertex( v.lhs_syntax, next_sibling(rhs_syntax), @@ -548,9 +510,8 @@ pub fn get_neighbours<'syn, 'b>( } Syntax::List { children, .. } => { add_neighbor( - EnterNovelDelimiterRHS { - contiguous: rhs_syntax.prev_is_contiguous(), - }, + EnterNovelDelimiterRHS, + cost, next_vertex( v.lhs_syntax, next_child(rhs_syntax, children), @@ -572,14 +533,14 @@ pub fn populate_change_map<'a, 'b>( for (e, v) in route { match e { - UnchangedNode { .. } => { + UnchangedNode => { // No change on this node or its children. let lhs = v.lhs_syntax.get_side().unwrap(); let rhs = v.rhs_syntax.get_side().unwrap(); insert_deep_unchanged(lhs, rhs, change_map); } - EnterUnchangedDelimiter { .. } => { + EnterUnchangedDelimiter => { // No change on the outer delimiter, but children may // have changed. let lhs = v.lhs_syntax.get_side().unwrap(); @@ -587,23 +548,23 @@ pub fn populate_change_map<'a, 'b>( change_map.insert(lhs, ChangeKind::Unchanged(rhs)); change_map.insert(rhs, ChangeKind::Unchanged(lhs)); } - ReplacedComment { levenshtein_pct } => { + ReplacedComment => { let lhs = v.lhs_syntax.get_side().unwrap(); let rhs = v.rhs_syntax.get_side().unwrap(); - - if *levenshtein_pct > 40 { - change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); - change_map.insert(rhs, ChangeKind::ReplacedComment(rhs, lhs)); - } else { - change_map.insert(lhs, ChangeKind::Novel); - change_map.insert(rhs, ChangeKind::Novel); - } + change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); + change_map.insert(rhs, ChangeKind::ReplacedComment(rhs, lhs)); + } + NovelComment => { + let lhs = v.lhs_syntax.get_side().unwrap(); + let rhs = v.rhs_syntax.get_side().unwrap(); + change_map.insert(lhs, ChangeKind::Novel); + change_map.insert(rhs, ChangeKind::Novel); } - NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => { + NovelAtomLHS | EnterNovelDelimiterLHS => { let lhs = v.lhs_syntax.get_side().unwrap(); change_map.insert(lhs, ChangeKind::Novel); } - NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => { + NovelAtomRHS | EnterNovelDelimiterRHS => { let rhs = v.rhs_syntax.get_side().unwrap(); change_map.insert(rhs, ChangeKind::Novel); } From d7d16533659916a2eccfc0a9c17df8d2a1cf9fd7 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 03:10:36 +0800 Subject: [PATCH 50/58] Merge similar code logic --- src/diff/dijkstra.rs | 16 ++--- src/diff/graph.rs | 151 ++++++++++++++++++------------------------- 2 files changed, 72 insertions(+), 95 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 7bec84eb8d..990623b05b 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -241,7 +241,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!(actions, vec![EnterUnchangedDelimiter, NovelAtomLHS,]); + assert_eq!(actions, vec![EnterUnchangedDelimiter, NovelLhs,]); } #[test] @@ -277,7 +277,7 @@ mod tests { let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( actions, - vec![EnterUnchangedDelimiter, NovelAtomRHS, NovelAtomRHS,] + vec![EnterUnchangedDelimiter, NovelRhs, NovelRhs,] ); } @@ -318,8 +318,8 @@ mod tests { assert_eq!( actions, vec![ - EnterNovelDelimiterRHS, - EnterNovelDelimiterLHS, + NovelRhs, + NovelLhs, UnchangedNode, UnchangedNode, ], @@ -349,7 +349,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!(actions, vec![UnchangedNode, NovelAtomLHS, NovelAtomLHS,]); + assert_eq!(actions, vec![UnchangedNode, NovelLhs, NovelLhs,]); } #[test] @@ -378,7 +378,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!(actions, vec![EnterNovelDelimiterLHS, NovelAtomLHS,]); + assert_eq!(actions, vec![NovelLhs, NovelLhs,]); } #[test] @@ -412,7 +412,7 @@ mod tests { let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( actions, - vec![EnterNovelDelimiterLHS, NovelAtomLHS, NovelAtomLHS,] + vec![NovelLhs, NovelLhs, NovelLhs,] ); } @@ -502,7 +502,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!(actions, vec![ReplacedComment, NovelAtomLHS]); + assert_eq!(actions, vec![ReplacedComment, NovelLhs]); } #[test] diff --git a/src/diff/graph.rs b/src/diff/graph.rs index a3394e2446..0a25c88cc9 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -84,7 +84,7 @@ fn get_ptr(opt: Option<&T>) -> *const T { /// ^ ^ /// ``` /// -/// From this vertex, we could take [`Edge::NovelAtomLHS`], bringing +/// From this vertex, we could take [`Edge::NovelLhs`], bringing /// us to this vertex. /// /// ```text @@ -92,7 +92,7 @@ fn get_ptr(opt: Option<&T>) -> *const T { /// ^ ^ /// ``` /// -/// Alternatively, we could take the [`Edge::NovelAtomRHS`], bringing us +/// Alternatively, we could take the [`Edge::NovelRhs`], bringing us /// to this vertex. /// /// ```text @@ -220,12 +220,10 @@ pub enum Edge { EnterUnchangedDelimiter, ReplacedComment, NovelComment, - NovelAtomLHS, - NovelAtomRHS, + NovelLhs, + NovelRhs, // TODO: An EnterNovelDelimiterBoth edge might help performance // rather doing LHS and RHS separately. - EnterNovelDelimiterLHS, - EnterNovelDelimiterRHS, } use Edge::*; @@ -330,6 +328,38 @@ fn next_vertex<'a, 'b>( } } +#[inline(always)] +fn next_novel<'a>(syntax: &'a Syntax<'a>, pop_cnt: u16) -> (u64, SideSyntax<'a>, u16) { + let mut cost = 300; + // TODO: should this apply if prev is a parent + // node rather than a sibling? + if !syntax.prev_is_contiguous() { + cost += NOT_CONTIGUOUS_PENALTY; + } + + let next_syntax; + let next_pop_cnt; + + match syntax { + Syntax::Atom { content, .. } => { + // If it's only punctuation, decrease the cost + // slightly. It's better to have novel punctuation + // than novel variable names. + if looks_like_punctuation(content) { + cost -= 10; + } + next_syntax = next_sibling(syntax); + next_pop_cnt = pop_cnt; + } + Syntax::List { children, .. } => { + next_syntax = next_child(syntax, children); + next_pop_cnt = pop_cnt + 1; + } + }; + + (cost, next_syntax, next_pop_cnt) +} + /// Compute the neighbours of `v`. pub fn get_neighbours<'syn, 'b>( v: &'b Vertex<'syn, 'b>, @@ -440,88 +470,35 @@ pub fn get_neighbours<'syn, 'b>( } if let (Some(lhs_syntax), _) = v_info { - let mut cost = 300; - // TODO: should this apply if prev is a parent - // node rather than a sibling? - if !lhs_syntax.prev_is_contiguous() { - cost += NOT_CONTIGUOUS_PENALTY; - } - - match lhs_syntax { - Syntax::Atom { content, .. } => { - // If it's only punctuation, decrease the cost - // slightly. It's better to have novel punctuation - // than novel variable names. - if looks_like_punctuation(content) { - cost -= 10; - } - - add_neighbor( - NovelAtomLHS, - cost, - next_vertex( - next_sibling(lhs_syntax), - v.rhs_syntax, - v.pop_both_ancestor, - v.pop_lhs_cnt, - v.pop_rhs_cnt, - ), - ); - } - Syntax::List { children, .. } => { - add_neighbor( - EnterNovelDelimiterLHS, - cost, - next_vertex( - next_child(lhs_syntax, children), - v.rhs_syntax, - v.pop_both_ancestor, - v.pop_lhs_cnt + 1, - v.pop_rhs_cnt, - ), - ); - } - }; + let (cost, next_syntax, next_pop_cnt) = next_novel(lhs_syntax, v.pop_lhs_cnt); + + add_neighbor( + NovelLhs, + cost, + next_vertex( + next_syntax, + v.rhs_syntax, + v.pop_both_ancestor, + next_pop_cnt, + v.pop_rhs_cnt, + ), + ); } if let (_, Some(rhs_syntax)) = v_info { - let mut cost = 300; - if !rhs_syntax.prev_is_contiguous() { - cost += NOT_CONTIGUOUS_PENALTY; - } - - match rhs_syntax { - Syntax::Atom { content, .. } => { - if looks_like_punctuation(content) { - cost -= 10; - } - - add_neighbor( - NovelAtomRHS, - cost, - next_vertex( - v.lhs_syntax, - next_sibling(rhs_syntax), - v.pop_both_ancestor, - v.pop_lhs_cnt, - v.pop_rhs_cnt, - ), - ); - } - Syntax::List { children, .. } => { - add_neighbor( - EnterNovelDelimiterRHS, - cost, - next_vertex( - v.lhs_syntax, - next_child(rhs_syntax, children), - v.pop_both_ancestor, - v.pop_lhs_cnt, - v.pop_rhs_cnt + 1, - ), - ); - } - }; + let (cost, next_syntax, next_pop_cnt) = next_novel(rhs_syntax, v.pop_rhs_cnt); + + add_neighbor( + NovelRhs, + cost, + next_vertex( + v.lhs_syntax, + next_syntax, + v.pop_both_ancestor, + v.pop_lhs_cnt, + next_pop_cnt, + ), + ); } } @@ -560,11 +537,11 @@ pub fn populate_change_map<'a, 'b>( change_map.insert(lhs, ChangeKind::Novel); change_map.insert(rhs, ChangeKind::Novel); } - NovelAtomLHS | EnterNovelDelimiterLHS => { + NovelLhs => { let lhs = v.lhs_syntax.get_side().unwrap(); change_map.insert(lhs, ChangeKind::Novel); } - NovelAtomRHS | EnterNovelDelimiterRHS => { + NovelRhs => { let rhs = v.rhs_syntax.get_side().unwrap(); change_map.insert(rhs, ChangeKind::Novel); } From 96a779c68a52d4f12e03e63954bfcef09109c16c Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 03:49:36 +0800 Subject: [PATCH 51/58] Simplify code --- src/diff/dijkstra.rs | 25 +++++++------------------ src/diff/graph.rs | 41 ++++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 990623b05b..acacacc0cd 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -57,8 +57,8 @@ fn shortest_path<'a, 'b>( if distance_to_next < next.shortest_distance.get() { next.shortest_distance.set(distance_to_next); - next.last_edge.set(Some(edge)); - next.predecessor.set(Some(current)); + next.pred_edge.set(Some(edge)); + next.pred_vertex.set(Some(current)); heap.push(Reverse(distance_to_next), next); } } @@ -79,8 +79,8 @@ fn shortest_path<'a, 'b>( ); let mut vertex_route = successors( - end.last_edge.get().zip(end.predecessor.get()), - |&(_, node)| node.last_edge.get().zip(node.predecessor.get()), + end.pred_edge.get().zip(end.pred_vertex.get()), + |&(_, node)| node.pred_edge.get().zip(node.pred_vertex.get()), ) .collect::>(); vertex_route.reverse(); @@ -275,10 +275,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![EnterUnchangedDelimiter, NovelRhs, NovelRhs,] - ); + assert_eq!(actions, vec![EnterUnchangedDelimiter, NovelRhs, NovelRhs]); } #[test] @@ -317,12 +314,7 @@ mod tests { let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( actions, - vec![ - NovelRhs, - NovelLhs, - UnchangedNode, - UnchangedNode, - ], + vec![NovelRhs, NovelLhs, UnchangedNode, UnchangedNode], ); } @@ -410,10 +402,7 @@ mod tests { let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); - assert_eq!( - actions, - vec![NovelLhs, NovelLhs, NovelLhs,] - ); + assert_eq!(actions, vec![NovelLhs, NovelLhs, NovelLhs]); } #[test] diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 0a25c88cc9..80f687831f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -104,23 +104,23 @@ pub struct Vertex<'a, 'b> { // states pub is_visited: Cell, pub shortest_distance: Cell, - pub last_edge: Cell>, - pub predecessor: Cell>>, + pub pred_edge: Cell>, + pub pred_vertex: Cell>>, // core info pub lhs_syntax: SideSyntax<'a>, pub rhs_syntax: SideSyntax<'a>, - /// If we've entered the LHS and RHS together, we must pop both - /// sides together too. Otherwise we'd consider the following - /// case to have no changes. - /// - /// ```text - /// Old: (a b c) - /// New: (a b) c - /// ``` + // If we've entered the LHS and RHS together, we must pop both + // sides together too. Otherwise we'd consider the following + // case to have no changes. + // + // ```text + // Old: (a b c) + // New: (a b) c + // ``` pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, - /// If we've entered the LHS or RHS separately, we can pop either - /// side independently. + // If we've entered the LHS or RHS separately, we can pop either + // side independently. pop_lhs_cnt: u16, pop_rhs_cnt: u16, } @@ -138,8 +138,8 @@ impl<'a, 'b> Vertex<'a, 'b> { Vertex { is_visited: Cell::new(false), shortest_distance: Cell::new(u64::MAX), - last_edge: Cell::new(None), - predecessor: Cell::new(None), + pred_edge: Cell::new(None), + pred_vertex: Cell::new(None), lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), rhs_syntax: rhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), pop_both_ancestor: None, @@ -382,10 +382,10 @@ pub fn get_neighbours<'syn, 'b>( if let (Some(lhs_syntax), Some(rhs_syntax)) = v_info { if lhs_syntax == rhs_syntax { // Both nodes are equal, the happy case. - let depth_difference = (lhs_syntax.num_ancestors() as i32 - - rhs_syntax.num_ancestors() as i32) + let depth_difference = (lhs_syntax.num_ancestors() as i64 + - rhs_syntax.num_ancestors() as i64) .unsigned_abs(); - let cost = min(40, u64::from(depth_difference) + 1); + let cost = min(40, depth_difference + 1); add_neighbor( UnchangedNode, @@ -415,11 +415,10 @@ pub fn get_neighbours<'syn, 'b>( { // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { - // TODO: be consistent between parents_next and next_parents. - let depth_difference = (lhs_syntax.num_ancestors() as i32 - - rhs_syntax.num_ancestors() as i32) + let depth_difference = (lhs_syntax.num_ancestors() as i64 + - rhs_syntax.num_ancestors() as i64) .unsigned_abs(); - let cost = 100 + min(40, u64::from(depth_difference)); + let cost = 100 + min(40, depth_difference); add_neighbor( EnterUnchangedDelimiter, From 864fecf3e33ea14388e9964b6a36b7f16bfc1ed9 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 04:28:53 +0800 Subject: [PATCH 52/58] Optimize parent_stack_eq --- src/diff/graph.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 80f687831f..b8bbc482d7 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -151,8 +151,10 @@ impl<'a, 'b> Vertex<'a, 'b> { fn parent_stack_eq(&self, other: &Vertex<'a, 'b>) -> bool { // Vertices are pinned so ptrs are unique IDs get_ptr(self.pop_both_ancestor) == get_ptr(other.pop_both_ancestor) - && self.pop_lhs_cnt == other.pop_lhs_cnt - && self.pop_rhs_cnt == other.pop_rhs_cnt + // Vertices are equal => LHS/RHS syntaxes are equal + // Then if PopBoth ancestors are equal => PopLhs/Rhs counts are equal + // && self.pop_lhs_cnt == other.pop_lhs_cnt + // && self.pop_rhs_cnt == other.pop_rhs_cnt } fn can_pop_either(&self) -> bool { From 8845aab1e08f5160fa360190c1d2aa892e352d79 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 05:00:45 +0800 Subject: [PATCH 53/58] Optimize next_vertex --- src/diff/graph.rs | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index b8bbc482d7..7d29e32767 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -34,6 +34,10 @@ impl<'a> SideSyntax<'a> { self.data & 1 == 0 } + pub fn is_parent(&self) -> bool { + self.data & 1 == 1 + } + pub fn get_side(&self) -> Option<&'a Syntax<'a>> { if self.is_side() { Some(unsafe { &*(self.data as *const _) }) @@ -42,13 +46,14 @@ impl<'a> SideSyntax<'a> { } } - pub fn parent(&self) -> Option<&'a Syntax<'a>> { - match self.get_side() { - Some(side) => side.parent(), - None => match self.data ^ 1 { + pub fn get_parent(&self) -> Option<&'a Syntax<'a>> { + if self.is_parent() { + match self.data ^ 1 { 0 => None, d => Some(unsafe { &*(d as *const _) }), - }, + } + } else { + None } } @@ -127,8 +132,8 @@ pub struct Vertex<'a, 'b> { impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { - !self.lhs_syntax.is_side() - && !self.rhs_syntax.is_side() + self.lhs_syntax.is_parent() + && self.rhs_syntax.is_parent() && self.pop_both_ancestor.is_none() && self.pop_lhs_cnt == 0 && self.pop_rhs_cnt == 0 @@ -293,25 +298,23 @@ fn next_vertex<'a, 'b>( mut pop_rhs_cnt: u16, ) -> Vertex<'a, 'b> { loop { - while !lhs_syntax.is_side() && pop_lhs_cnt > 0 { - lhs_syntax = next_sibling(lhs_syntax.parent().unwrap()); + while lhs_syntax.is_parent() && pop_lhs_cnt > 0 { + lhs_syntax = next_sibling(lhs_syntax.get_parent().unwrap()); pop_lhs_cnt -= 1; } - while !rhs_syntax.is_side() && pop_rhs_cnt > 0 { - rhs_syntax = next_sibling(rhs_syntax.parent().unwrap()); + while rhs_syntax.is_parent() && pop_rhs_cnt > 0 { + rhs_syntax = next_sibling(rhs_syntax.get_parent().unwrap()); pop_rhs_cnt -= 1; } - if let (false, false, Some(ancestor), 0, 0) = ( - lhs_syntax.is_side(), - rhs_syntax.is_side(), + if let (true, true, Some(ancestor)) = ( + lhs_syntax.is_parent(), + rhs_syntax.is_parent(), pop_both_ancestor, - pop_lhs_cnt, - pop_rhs_cnt, ) { - lhs_syntax = next_sibling(lhs_syntax.parent().unwrap()); - rhs_syntax = next_sibling(rhs_syntax.parent().unwrap()); + lhs_syntax = next_sibling(lhs_syntax.get_parent().unwrap()); + rhs_syntax = next_sibling(rhs_syntax.get_parent().unwrap()); pop_both_ancestor = ancestor.pop_both_ancestor; pop_lhs_cnt = ancestor.pop_lhs_cnt; pop_rhs_cnt = ancestor.pop_rhs_cnt; From 63c2830363687d29a94269b8a3219c0e1fd15f34 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 05:07:49 +0800 Subject: [PATCH 54/58] Make SideSyntax conform to strict provenance --- src/diff/graph.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7d29e32767..84046d9246 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -15,32 +15,33 @@ use crate::{ parse::syntax::{AtomKind, Syntax}, }; -/// Compress two `&Syntax` into a usize. +/// Compress two `&Syntax` into a pointer. /// /// Utilize the LSB as flag since `&Syntax` is aligned. /// /// ```text /// LSB = 0 -> side syntax -/// LSB = 1 -> optional parent syntax (side syntax is None) +/// LSB = 1 -> optional parent syntax /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SideSyntax<'a> { - data: usize, + data: *const u8, phantom: PhantomData<&'a Syntax<'a>>, } impl<'a> SideSyntax<'a> { pub fn is_side(&self) -> bool { - self.data & 1 == 0 + self.data as usize & 1 == 0 } pub fn is_parent(&self) -> bool { - self.data & 1 == 1 + self.data as usize & 1 == 1 } pub fn get_side(&self) -> Option<&'a Syntax<'a>> { if self.is_side() { - Some(unsafe { &*(self.data as *const _) }) + let data = self.data; + unsafe { std::mem::transmute(data) } } else { None } @@ -48,10 +49,8 @@ impl<'a> SideSyntax<'a> { pub fn get_parent(&self) -> Option<&'a Syntax<'a>> { if self.is_parent() { - match self.data ^ 1 { - 0 => None, - d => Some(unsafe { &*(d as *const _) }), - } + let data = self.data.wrapping_sub(1); + unsafe { std::mem::transmute(data) } } else { None } @@ -66,7 +65,7 @@ impl<'a> SideSyntax<'a> { pub fn from_parent(parent: Option<&'a Syntax<'a>>) -> Self { Self { - data: parent.map_or(0, |s| s as *const _ as usize) | 1, + data: (get_ptr(parent) as *const u8).wrapping_add(1), phantom: PhantomData, } } From eb16d57d3b024150478cf21b950e4462cf67023c Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 05:13:04 +0800 Subject: [PATCH 55/58] Further squeeze Vertex --- src/diff/graph.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 84046d9246..c93c182b0f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -107,7 +107,7 @@ fn get_ptr(opt: Option<&T>) -> *const T { pub struct Vertex<'a, 'b> { // states pub is_visited: Cell, - pub shortest_distance: Cell, + pub shortest_distance: Cell, pub pred_edge: Cell>, pub pred_vertex: Cell>>, @@ -125,8 +125,8 @@ pub struct Vertex<'a, 'b> { pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, // If we've entered the LHS or RHS separately, we can pop either // side independently. - pop_lhs_cnt: u16, - pop_rhs_cnt: u16, + pop_lhs_cnt: u8, + pop_rhs_cnt: u8, } impl<'a, 'b> Vertex<'a, 'b> { @@ -141,7 +141,7 @@ impl<'a, 'b> Vertex<'a, 'b> { pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { Vertex { is_visited: Cell::new(false), - shortest_distance: Cell::new(u64::MAX), + shortest_distance: Cell::new(u32::MAX), pred_edge: Cell::new(None), pred_vertex: Cell::new(None), lhs_syntax: lhs_syntax.map_or(SideSyntax::from_parent(None), SideSyntax::from_side), @@ -234,7 +234,7 @@ pub enum Edge { use Edge::*; -const NOT_CONTIGUOUS_PENALTY: u64 = 50; +const NOT_CONTIGUOUS_PENALTY: u32 = 50; pub type SeenMap<'syn, 'b> = hashbrown::HashMap< &'b Vertex<'syn, 'b>, @@ -293,8 +293,8 @@ fn next_vertex<'a, 'b>( mut lhs_syntax: SideSyntax<'a>, mut rhs_syntax: SideSyntax<'a>, mut pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, - mut pop_lhs_cnt: u16, - mut pop_rhs_cnt: u16, + mut pop_lhs_cnt: u8, + mut pop_rhs_cnt: u8, ) -> Vertex<'a, 'b> { loop { while lhs_syntax.is_parent() && pop_lhs_cnt > 0 { @@ -333,7 +333,7 @@ fn next_vertex<'a, 'b>( } #[inline(always)] -fn next_novel<'a>(syntax: &'a Syntax<'a>, pop_cnt: u16) -> (u64, SideSyntax<'a>, u16) { +fn next_novel<'a>(syntax: &'a Syntax<'a>, pop_cnt: u8) -> (u32, SideSyntax<'a>, u8) { let mut cost = 300; // TODO: should this apply if prev is a parent // node rather than a sibling? @@ -369,7 +369,7 @@ pub fn get_neighbours<'syn, 'b>( v: &'b Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, - neighbors: &mut Vec<(Edge, u64, &'b Vertex<'syn, 'b>)>, + neighbors: &mut Vec<(Edge, u32, &'b Vertex<'syn, 'b>)>, ) { let mut add_neighbor = std::convert::identity( #[inline(always)] @@ -386,8 +386,8 @@ pub fn get_neighbours<'syn, 'b>( if let (Some(lhs_syntax), Some(rhs_syntax)) = v_info { if lhs_syntax == rhs_syntax { // Both nodes are equal, the happy case. - let depth_difference = (lhs_syntax.num_ancestors() as i64 - - rhs_syntax.num_ancestors() as i64) + let depth_difference = (lhs_syntax.num_ancestors() as i32 + - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); let cost = min(40, depth_difference + 1); @@ -419,8 +419,8 @@ pub fn get_neighbours<'syn, 'b>( { // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { - let depth_difference = (lhs_syntax.num_ancestors() as i64 - - rhs_syntax.num_ancestors() as i64) + let depth_difference = (lhs_syntax.num_ancestors() as i32 + - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); let cost = 100 + min(40, depth_difference); @@ -451,8 +451,8 @@ pub fn get_neighbours<'syn, 'b>( { // Both sides are comments and their content is reasonably similar. let levenshtein_pct = - (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - let cost = 150 + u64::from(100 - levenshtein_pct); + (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u32; + let cost = 150 + (100 - levenshtein_pct); add_neighbor( if levenshtein_pct > 40 { From 88d5cee97af48dc51a7630f537709cac186b2fcd Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 05:50:28 +0800 Subject: [PATCH 56/58] Fine tune next_sibling & next_child --- src/diff/graph.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index c93c182b0f..b67b8a80fe 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -277,15 +277,18 @@ fn looks_like_punctuation(content: &str) -> bool { #[inline(always)] fn next_sibling<'a>(syntax: &'a Syntax<'a>) -> SideSyntax<'a> { - let parent = SideSyntax::from_parent(syntax.parent()); - syntax.next_sibling().map_or(parent, SideSyntax::from_side) + match syntax.next_sibling() { + Some(sibling) => SideSyntax::from_side(sibling), + None => SideSyntax::from_parent(syntax.parent()), + } } #[inline(always)] fn next_child<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SideSyntax<'a> { - let parent = SideSyntax::from_parent(Some(syntax)); - let child = children.get(0).copied(); - child.map_or(parent, SideSyntax::from_side) + match children.get(0).copied() { + Some(child) => SideSyntax::from_side(child), + None => SideSyntax::from_parent(Some(syntax)), + } } #[inline(always)] From 5167d303773bd4730d70c0985c0460b61bdf4584 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 06:01:33 +0800 Subject: [PATCH 57/58] Simplify next_vertex --- src/diff/graph.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index b67b8a80fe..1e545673b0 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -300,23 +300,23 @@ fn next_vertex<'a, 'b>( mut pop_rhs_cnt: u8, ) -> Vertex<'a, 'b> { loop { - while lhs_syntax.is_parent() && pop_lhs_cnt > 0 { - lhs_syntax = next_sibling(lhs_syntax.get_parent().unwrap()); + while let (Some(lhs_parent), 1..) = (lhs_syntax.get_parent(), pop_lhs_cnt) { + lhs_syntax = next_sibling(lhs_parent); pop_lhs_cnt -= 1; } - while rhs_syntax.is_parent() && pop_rhs_cnt > 0 { - rhs_syntax = next_sibling(rhs_syntax.get_parent().unwrap()); + while let (Some(rhs_parent), 1..) = (rhs_syntax.get_parent(), pop_rhs_cnt) { + rhs_syntax = next_sibling(rhs_parent); pop_rhs_cnt -= 1; } - if let (true, true, Some(ancestor)) = ( - lhs_syntax.is_parent(), - rhs_syntax.is_parent(), + if let (Some(lhs_parent), Some(rhs_parent), Some(ancestor)) = ( + lhs_syntax.get_parent(), + rhs_syntax.get_parent(), pop_both_ancestor, ) { - lhs_syntax = next_sibling(lhs_syntax.get_parent().unwrap()); - rhs_syntax = next_sibling(rhs_syntax.get_parent().unwrap()); + lhs_syntax = next_sibling(lhs_parent); + rhs_syntax = next_sibling(rhs_parent); pop_both_ancestor = ancestor.pop_both_ancestor; pop_lhs_cnt = ancestor.pop_lhs_cnt; pop_rhs_cnt = ancestor.pop_rhs_cnt; From 80eae29dc00d07d901c21cdecb97c67907302150 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Tue, 22 Nov 2022 09:08:20 +0800 Subject: [PATCH 58/58] Fix comment --- src/diff/graph.rs | 60 +++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 1e545673b0..625f9f8f19 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -114,17 +114,37 @@ pub struct Vertex<'a, 'b> { // core info pub lhs_syntax: SideSyntax<'a>, pub rhs_syntax: SideSyntax<'a>, - // If we've entered the LHS and RHS together, we must pop both - // sides together too. Otherwise we'd consider the following - // case to have no changes. + // If we've entered the LHS delimiter and RHS delimiter together, + // we must pop both sides together too. Otherwise we'd consider + // the following case to have no changes. // // ```text // Old: (a b c) // New: (a b) c // ``` + // + // If we've entered the LHS delimiter or RHS delimiter separately, + // we can pop either side independently. + // + // So each entered delimiter belongs to one of two types, either + // PopBoth or PopEither. They form two stacks that are represented + // in the following structure. + // + // ```text + // | 3, 2, None + // | |PopEither| |PopEither| <--+ + // | |PopEither| |PopEither| | + // | |PopEither| | + // | | + // | 0, 0, Some ---------+ + // | | PopBoth | | PopBoth | <--+ + // | | + // | 1, 2, Some ---------+ + // | | PopBoth | | PopBoth | + // V |PopEither| |PopEither| + // Top |PopEither| + // ``` pop_both_ancestor: Option<&'b Vertex<'a, 'b>>, - // If we've entered the LHS or RHS separately, we can pop either - // side independently. pop_lhs_cnt: u8, pop_rhs_cnt: u8, } @@ -152,11 +172,12 @@ impl<'a, 'b> Vertex<'a, 'b> { } } - fn parent_stack_eq(&self, other: &Vertex<'a, 'b>) -> bool { - // Vertices are pinned so ptrs are unique IDs + fn delim_stack_eq(&self, other: &Vertex<'a, 'b>) -> bool { + // Vertices are pinned so addresses can be used as unique IDs. get_ptr(self.pop_both_ancestor) == get_ptr(other.pop_both_ancestor) - // Vertices are equal => LHS/RHS syntaxes are equal - // Then if PopBoth ancestors are equal => PopLhs/Rhs counts are equal + // We've already checked that LHS/RHS syntaxes are equal. + // Then if their PopBoth ancestors are equal, PopLhs/Rhs + // counts must be equal. // && self.pop_lhs_cnt == other.pop_lhs_cnt // && self.pop_rhs_cnt == other.pop_rhs_cnt } @@ -169,7 +190,7 @@ impl<'a, 'b> Vertex<'a, 'b> { impl PartialEq for Vertex<'_, '_> { fn eq(&self, other: &Self) -> bool { // Strictly speaking, we should compare the whole - // EnteredDelimiter stack, not just the immediate + // entered delimiter stack, not just the immediate // parents. By taking the immediate parent, we have // vertices with different stacks that are 'equal'. // @@ -184,15 +205,13 @@ impl PartialEq for Vertex<'_, '_> { // Handling this properly would require considering many // more vertices to be distinct, exponentially increasing // the graph size relative to tree depth. - - // We do want to distinguish whether we can pop each side - // independently though. Without this, if we find a case - // where we can pop sides together, we don't consider the - // case where we get a better diff by popping each side - // separately. - self.lhs_syntax == other.lhs_syntax && self.rhs_syntax == other.rhs_syntax + // We do want to distinguish whether we can pop each side + // independently though. Without this, if we find a case + // where we can pop sides together, we don't consider the + // case where we get a better diff by popping each side + // separately. && self.can_pop_either() == other.can_pop_either() } } @@ -213,13 +232,14 @@ impl Default for Vertex<'_, '_> { } } -/// An edge in our graph, with an associated [`cost`](Edge::cost). +/// An edge in our graph. /// /// A syntax node can always be marked as novel, so a vertex will have /// at least a NovelFoo edge. Depending on the syntax nodes of the /// current [`Vertex`], other edges may also be available. /// -/// See [`neighbours`] for all the edges available for a given `Vertex`. +/// See [`get_neighbours`] for all the edges available for a given +/// `Vertex`. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum Edge { UnchangedNode, @@ -252,7 +272,7 @@ fn allocate_if_new<'syn, 'b>( if let Some(existing) = value { return existing; } - if key.parent_stack_eq(&v) { + if key.delim_stack_eq(&v) { return key; } let allocated = alloc.alloc(v);