diff --git a/crates/genemichaels-lib/src/lib.rs b/crates/genemichaels-lib/src/lib.rs index 11d2abc..cb33ddb 100644 --- a/crates/genemichaels-lib/src/lib.rs +++ b/crates/genemichaels-lib/src/lib.rs @@ -503,7 +503,7 @@ pub fn format_str(source: &str, config: &FormatConfig) -> Result o + 1, None => source.len(), diff --git a/crates/genemichaels-lib/src/sg_general.rs b/crates/genemichaels-lib/src/sg_general.rs index 4e4e054..86d213a 100644 --- a/crates/genemichaels-lib/src/sg_general.rs +++ b/crates/genemichaels-lib/src/sg_general.rs @@ -321,6 +321,7 @@ pub(crate) fn append_macro_body( sg: &mut SplitGroupBuilder, tokens: TokenStream, ) { + // Try to parse entire macro like a function call if let Ok(exprs) = syn::parse2::(quote!{ f(#tokens) }) { @@ -331,6 +332,8 @@ pub(crate) fn append_macro_body( return; } } + + // Try to parse entire macro like a block if let Ok(block) = syn::parse2::(quote!{ { #tokens @@ -348,27 +351,17 @@ pub(crate) fn append_macro_body( } } - #[derive(PartialEq)] - enum ConsecMode { - // Start, joining punct (.) - ConnectForward, - // Idents, literals - NoConnect, - // Other punctuation - Punct, - } - - // Split token stream into "expressions" using `;` and `,` and then try to - // re-evaluate each expression to use normal formatting. + // Split token stream into "expressions" (/substream) using `;` and `,` and then + // try to format each expression. let mut substreams: Vec<(Vec, Option)> = vec![]; { let mut top = vec![]; for t in tokens { let (push, break_) = match &t { - proc_macro2::TokenTree::Punct(p) if matches!(p.as_char(), ';' | ',') => { + TokenTree::Punct(p) if matches!(p.as_char(), ';' | ',') => { (false, Some(Some(p.clone()))) }, - proc_macro2::TokenTree::Group(g) if matches!(g.delimiter(), proc_macro2::Delimiter::Brace) => { + TokenTree::Group(g) if matches!(g.delimiter(), proc_macro2::Delimiter::Brace) => { (true, Some(None)) }, _ => { @@ -403,6 +396,8 @@ pub(crate) fn append_macro_body( } let tokens = TokenStream::from_iter(sub.0); let punct = sub.1; + + // Try to parse current expression/substream as a function call if let Ok(exprs) = syn::parse2::(quote!{ f(#tokens #punct) }) { @@ -420,6 +415,8 @@ pub(crate) fn append_macro_body( break 'nextsub; } } + + // Try to parse current expression/substream as a block if let Ok(block) = syn::parse2::(quote!{ { #tokens #punct @@ -436,11 +433,37 @@ pub(crate) fn append_macro_body( break 'nextsub; } } + + // Freeform formatting { - let mut mode = ConsecMode::ConnectForward; + /// Identify punctuation that connects things tightly + fn is_pull_next_punct(p: &Punct) -> bool { + return match p.as_char() { + '.' => true, + '\'' => true, + '$' => true, + '#' => true, + _ => false, + }; + } + + // With exceptions, the default heterogenous adjacent token tree behavior is to + // push. For punctuation-adjacent, it depends on the punctuation type. + fn is_hetero_push_next(prev: &Option) -> bool { + return match &prev { + Some(prev) => match prev { + TokenTree::Group(_) => true, + TokenTree::Ident(_) | TokenTree::Literal(_) => true, + TokenTree::Punct(punct) => !is_pull_next_punct(&punct), + }, + None => false, + }; + } + + let mut previous: Option = None; for t in tokens { - match t { - proc_macro2::TokenTree::Group(g) => { + match &t { + TokenTree::Group(g) => { append_whitespace(out, base_indent, sg, g.span_open().start()); sg.child({ let mut sg = new_sg(out); @@ -454,11 +477,8 @@ pub(crate) fn append_macro_body( }), g.stream()); }, proc_macro2::Delimiter::Brace => { - match mode { - ConsecMode::ConnectForward => { }, - _ => { - sg.seg(out, " "); - }, + if is_hetero_push_next(&previous) { + sg.seg(out, " "); } append_macro_body_bracketed(out, &indent, &mut sg, &MacroDelimiter::Brace({ let mut delim = Brace::default(); @@ -480,66 +500,42 @@ pub(crate) fn append_macro_body( } sg.build(out) }); - mode = ConsecMode::NoConnect; }, - proc_macro2::TokenTree::Ident(i) => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect | ConsecMode::Punct => { - sg.seg(out, " "); - }, + TokenTree::Ident(i) => { + if is_hetero_push_next(&previous) { + sg.seg(out, " "); } append_whitespace(out, base_indent, sg, i.span().start()); sg.seg(out, &i.to_string()); - mode = ConsecMode::NoConnect; }, - proc_macro2::TokenTree::Punct(p) => match p.as_char() { - '\'' | '$' | '#' => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect | ConsecMode::Punct => { - sg.seg(out, " "); - }, - } - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::ConnectForward; - }, - ':' => { - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::Punct; - }, - '.' => { - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::ConnectForward; - }, - _ => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect => { - sg.seg(out, " "); + TokenTree::Punct(p) => { + if match &previous { + Some(previous) => match previous { + TokenTree::Group(_) | + TokenTree::Ident(_) | + TokenTree::Literal(_) => match p.as_char() { + ':' => false, + '.' => false, + _ => true, }, - ConsecMode::Punct => { }, - } - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::Punct; - }, - }, - proc_macro2::TokenTree::Literal(l) => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect | ConsecMode::Punct => { - sg.seg(out, " "); + TokenTree::Punct(prev_p) => prev_p.span().end() != p.span().start(), }, + None => false, + } { + sg.seg(out, " "); + } + append_whitespace(out, base_indent, sg, p.span().start()); + sg.seg(out, &p.to_string()); + }, + TokenTree::Literal(l) => { + if is_hetero_push_next(&previous) { + sg.seg(out, " "); } append_whitespace(out, base_indent, sg, l.span().start()); sg.seg(out, &l.to_string()); - mode = ConsecMode::NoConnect; }, } + previous = Some(t); } if let Some(suf) = punct { append_whitespace(out, base_indent, sg, suf.span().start()); diff --git a/crates/genemichaels-lib/src/whitespace.rs b/crates/genemichaels-lib/src/whitespace.rs index e766bed..dbbafdb 100644 --- a/crates/genemichaels-lib/src/whitespace.rs +++ b/crates/genemichaels-lib/src/whitespace.rs @@ -39,6 +39,9 @@ fn unicode_len(text: &str) -> VisualLen { VisualLen(text.chars().count()) } +/// Identifies the start/stop locations of whitespace in a chunk of source. +/// Whitespace is grouped runs, but the `keep_max_blank_lines` parameter allows +/// splitting the groups. pub fn extract_whitespaces( keep_max_blank_lines: usize, source: &str, @@ -374,7 +377,12 @@ pub fn extract_whitespaces( ).map_err( |e| loga::err_with( "Error undoing syn parse transformations", - ea!(line = e.span().start().line, column = e.span().start().column, error = e.to_string()), + ea!( + line = e.span().start().line, + column = e.span().start().column, + error = e.to_string(), + source = source.lines().skip(e.span().start().line - 1).next().unwrap() + ), ), )?, ); diff --git a/crates/genemichaels-lib/tests/roundtrip.rs b/crates/genemichaels-lib/tests/roundtrip.rs index faeec93..9de4f35 100644 --- a/crates/genemichaels-lib/tests/roundtrip.rs +++ b/crates/genemichaels-lib/tests/roundtrip.rs @@ -1,5 +1,4 @@ #![cfg(test)] - use genemichaels_lib::{ format_str, FormatConfig, @@ -137,6 +136,12 @@ fn rt_macro_star_equal() { "#); } +#[test] +fn rt_macro_star_equal_gt() { + rt(r#"x!(a * => b); +"#); +} + #[test] fn rt_comments_end() { rt(r#"const X: i32 = 7; @@ -420,7 +425,22 @@ fn rt_self_type() { #[test] fn rt_skip_shebang() { - rt(r#"#!#[cfg(test)] + rt(r#"#!/bin/bash fn main() { } "#); } + +#[test] +fn rt_dontskip_modattrs() { + rt( + r#"#![allow( + clippy::too_many_arguments, + clippy::field_reassign_with_default, + clippy::never_loop, + clippy::derive_hash_xor_eq +)] + +fn main() { } +"#, + ); +}