diff --git a/harper-typst/src/lib.rs b/harper-typst/src/lib.rs index 28288be2..a1919273 100644 --- a/harper-typst/src/lib.rs +++ b/harper-typst/src/lib.rs @@ -7,8 +7,8 @@ use typst_translator::TypstTranslator; use harper_core::{parsers::Parser, Token}; use itertools::Itertools; use typst_syntax::{ - ast::{AstNode, Markup}, - Source, + ast::{AstNode, Expr, Markup}, + Source, SyntaxNode, }; /// A parser that wraps Harper's `PlainEnglish` parser allowing one to ingest Typst files. @@ -25,14 +25,61 @@ impl Parser for Typst { // Recurse through AST to create tokens let parse_helper = TypstTranslator::new(&typst_document); - typst_tree - .exprs() + let mut buf = Vec::new(); + let exprs = typst_tree.exprs().collect_vec(); + let exprs = convert_parbreaks(&mut buf, &exprs); + exprs + .into_iter() .filter_map(|ex| parse_helper.parse_expr(ex, OffsetCursor::new(&typst_document))) .flatten() .collect_vec() } } +/// Converts newlines after certain elements to paragraph breaks +/// This is accomplished here instead of in the translating module because at this point there is +/// still semantic information associated with the elements. +/// +/// Newlines are separate expressions in the parse tree (as the Space variant) +fn convert_parbreaks<'a>(buf: &'a mut Vec, exprs: &'a [Expr]) -> Vec> { + // Owned collection of nodes forcibly casted to paragraph breaks + *buf = exprs + .iter() + .map(|e| { + let mut node = SyntaxNode::placeholder(typst_syntax::SyntaxKind::Parbreak); + node.synthesize(e.span()); + node + }) + .collect_vec(); + + let should_parbreak = |e1, e2, e3| { + matches!(e2, Expr::Space(_)) + && (matches!(e1, Expr::Heading(_) | Expr::List(_)) + || matches!(e3, Expr::Heading(_) | Expr::List(_))) + }; + + let mut res: Vec = Vec::new(); + let mut last_element: Option = None; + for ((i, expr), (_, next_expr)) in exprs.iter().enumerate().tuple_windows() { + let mut current_expr = expr.clone(); + if let Some(last_element) = last_element { + if should_parbreak(last_element, *expr, *next_expr) { + let pbreak = typst_syntax::ast::Parbreak::from_untyped(&buf[i]) + .expect("Unable to convert expression to Parbreak"); + current_expr = Expr::Parbreak(pbreak); + } + } + res.push(current_expr); + last_element = Some(expr.clone()) + } + // Push last element because it will be excluded by tuple_windows() above + if let Some(last) = exprs.iter().last() { + res.push(last.clone()); + } + + res +} + #[cfg(test)] mod tests { use itertools::Itertools; @@ -201,7 +248,7 @@ mod tests { &token_kinds.as_slice(), &[ TokenKind::Word(_), - TokenKind::Newline(1), + TokenKind::ParagraphBreak, TokenKind::Word(_) ] )) @@ -228,10 +275,10 @@ mod tests { } #[test] - fn label_unlintable() { + fn label_ref_unlintable() { let source = "= Header