From 28d86733712d60e620162e2a7ac3ee433e351153 Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Sat, 11 May 2024 16:09:25 -0700 Subject: [PATCH] Treat tree-sitter nodes highlighted as comments as atoms Fixes #711 --- CHANGELOG.md | 7 +++++++ sample_files/compare.expected | 2 +- sample_files/elm_1.elm | 2 ++ sample_files/elm_2.elm | 2 ++ src/parse/tree_sitter_parser.rs | 16 +++++++--------- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc8ef066a2..62885967b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ ## 0.59 (unreleased) +### Parsing + +Difftastic now uses tree-sitter comment highlighing as a hint that +nodes should be treated as atoms. This ensures comments are treated +more consistently across languages. This fixes cases in Elm where +comment differences were ignored, and may improve other languages too. + ## 0.58 (released 11th May 2024) ### Parsing diff --git a/sample_files/compare.expected b/sample_files/compare.expected index 6c59c357ea..e80f8076b9 100644 --- a/sample_files/compare.expected +++ b/sample_files/compare.expected @@ -56,7 +56,7 @@ sample_files/elisp_contiguous_1.el sample_files/elisp_contiguous_2.el beaf7d6c5136d3db7a36ff49a131b251 - sample_files/elm_1.elm sample_files/elm_2.elm -54d73b12de1571cfb9b27d3b2a7f4f62 - +33b71893107538cff574276f2837adbb - sample_files/elvish_1.elv sample_files/elvish_2.elv f80b47646e7dd2bd3a49393d00657465 - diff --git a/sample_files/elm_1.elm b/sample_files/elm_1.elm index 60d77a6df1..b8c855bc16 100644 --- a/sample_files/elm_1.elm +++ b/sample_files/elm_1.elm @@ -1,6 +1,8 @@ module Main exposing (blue, green, list, x, y, z) +{- bar + -} list : List Int list = [ 1, 2, 3 ] diff --git a/sample_files/elm_2.elm b/sample_files/elm_2.elm index fba7f65bb5..fdc04a117b 100644 --- a/sample_files/elm_2.elm +++ b/sample_files/elm_2.elm @@ -6,6 +6,8 @@ module Main exposing ) +{- foo + -} list : List Int list = [ 1 diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 28e21125df..0a4b2829ba 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -507,7 +507,6 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { atom_nodes: vec![ "doctype", "quoted_attribute_value", - "comment", "raw_text", "tag_name", "text", @@ -956,10 +955,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_scala() }; TreeSitterConfig { language, - // TODO: probably all comments should be treated as atoms - atom_nodes: vec!["string", "template_string", "comment", "block_comment"] - .into_iter() - .collect(), + atom_nodes: vec!["string", "template_string"].into_iter().collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]")], highlight_query: ts::Query::new( language, @@ -973,9 +969,8 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_scheme() }; TreeSitterConfig { language, - atom_nodes: vec!["block_comment", "comment", "string"] - .into_iter() - .collect(), + // + atom_nodes: vec!["string"].into_iter().collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]")], highlight_query: ts::Query::new( language, @@ -1615,9 +1610,12 @@ fn syntax_from_cursor<'a>( *error_count += 1; } - if config.atom_nodes.contains(node.kind()) { + if config.atom_nodes.contains(node.kind()) || highlights.comment_ids.contains(&node.id()) { // Treat nodes like string literals as atoms, regardless // of whether they have children. + // + // Also, if this node is highlighted as a comment, treat it as + // an atom unconditionally. atom_from_cursor(arena, src, nl_pos, cursor, highlights, ignore_comments) } else if node.child_count() > 0 { Some(list_from_cursor(