From 4b0489e2f3c1136b206e93915ebedcc207d70969 Mon Sep 17 00:00:00 2001 From: Ika Date: Sun, 1 Sep 2019 14:30:33 +0800 Subject: [PATCH 001/282] fix: allow lowercase unicode escape (#440) --- cli/src/generate/prepare_grammar/expand_tokens.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index 2b88762bd8..9b594f3caa 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -12,7 +12,7 @@ use std::i32; lazy_static! { static ref CURLY_BRACE_REGEX: Regex = - Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-F,}][^}]*)\}"#).unwrap(); + Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap(); } const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/']; @@ -653,12 +653,15 @@ mod tests { Rule::pattern(r#"\{[ab]{3}\}"#), // Unicode codepoints Rule::pattern(r#"\u{1000A}"#), + // Unicode codepoints (lowercase) + Rule::pattern(r#"\u{1000b}"#), ], separators: vec![], examples: vec![ ("u{1234} ok", Some((0, "u{1234}"))), ("{aba}}", Some((1, "{aba}"))), ("\u{1000A}", Some((2, "\u{1000A}"))), + ("\u{1000b}", Some((3, "\u{1000b}"))), ], }, ]; From 1b033fdfa45faf14672f5f8895aef85eb136da75 Mon Sep 17 00:00:00 2001 From: Ika Date: Sun, 1 Sep 2019 23:52:39 +0800 Subject: [PATCH 002/282] feat(cli): support snapshot testing with `--update` flag This PR adds an `--update` flag to the `tree-sitter test` command, which adds the ability to replace the _expected_ output in the corpus.txt with the _actual_ output produced by the parser, that is, we can now simply use this `--update` flag to write all the corresponding parser output back to the corpus.txt, and we just need to check the output without typing its actual sexp. - use the same output format as `tree-sitter parse`, except there won't be any position information printed. - the corpus.txt won't be touched if there's no difference between the _expected_ output and the _actual_ output in that file. - if there're differences between _expected_ and _actual_, only the test case that is different will be replaced, the rest test cases will stay as-is. (All the delimiters `===`/`---` will be normalized as 80-column long, though.) - this flag also works with `--filter` flag. --- cli/src/lib.rs | 1 + cli/src/main.rs | 6 +- cli/src/parse.rs | 53 +------- cli/src/print.rs | 60 +++++++++ cli/src/test.rs | 251 ++++++++++++++++++++++++++++------- cli/src/tests/corpus_test.rs | 8 +- 6 files changed, 274 insertions(+), 105 deletions(-) create mode 100644 cli/src/print.rs diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 33a9904faa..e996083af7 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -9,6 +9,7 @@ pub mod test; pub mod util; pub mod wasm; pub mod web_ui; +pub mod print; #[cfg(test)] mod tests; diff --git a/cli/src/main.rs b/cli/src/main.rs index 59d04a97cf..7a5adbf19f 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -83,7 +83,8 @@ fn run() -> error::Result<()> { .takes_value(true), ) .arg(Arg::with_name("debug").long("debug").short("d")) - .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), + .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) + .arg(Arg::with_name("update").long("update").short("u")), ) .subcommand( SubCommand::with_name("highlight") @@ -150,9 +151,10 @@ fn run() -> error::Result<()> { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); let filter = matches.value_of("filter"); + let update = matches.is_present("update"); let corpus_path = current_dir.join("corpus"); if let Some(language) = loader.languages_at_path(¤t_dir)?.first() { - test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter)?; + test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?; } else { eprintln!("No language found"); } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index d1ddb49924..065ffb026a 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -1,4 +1,5 @@ use super::error::{Error, Result}; +use super::print::print_tree; use super::util; use std::io::{self, Write}; use std::path::Path; @@ -81,57 +82,7 @@ pub fn parse_file_at_path( let mut cursor = tree.walk(); if !quiet { - let mut needs_newline = false; - let mut indent_level = 0; - let mut did_visit_children = false; - loop { - let node = cursor.node(); - let is_named = node.is_named(); - if did_visit_children { - if is_named { - stdout.write(b")")?; - needs_newline = true; - } - if cursor.goto_next_sibling() { - did_visit_children = false; - } else if cursor.goto_parent() { - did_visit_children = true; - indent_level -= 1; - } else { - break; - } - } else { - if is_named { - if needs_newline { - stdout.write(b"\n")?; - } - for _ in 0..indent_level { - stdout.write(b" ")?; - } - let start = node.start_position(); - let end = node.end_position(); - if let Some(field_name) = cursor.field_name() { - write!(&mut stdout, "{}: ", field_name)?; - } - write!( - &mut stdout, - "({} [{}, {}] - [{}, {}]", - node.kind(), - start.row, - start.column, - end.row, - end.column - )?; - needs_newline = true; - } - if cursor.goto_first_child() { - did_visit_children = false; - indent_level += 1; - } else { - did_visit_children = true; - } - } - } + print_tree(&mut stdout, &mut cursor, true)?; cursor.reset(tree.root_node()); println!(""); } diff --git a/cli/src/print.rs b/cli/src/print.rs new file mode 100644 index 0000000000..7a57c625fa --- /dev/null +++ b/cli/src/print.rs @@ -0,0 +1,60 @@ +use super::error::{Result}; +use std::io::{Write}; +use tree_sitter::{TreeCursor}; + +pub fn print_tree(output: &mut Write, cursor: &mut TreeCursor, prints_position: bool) -> Result<()> { + let mut needs_newline = false; + let mut indent_level = 0; + let mut did_visit_children = false; + loop { + let node = cursor.node(); + let is_named = node.is_named(); + if did_visit_children { + if is_named { + output.write(b")")?; + needs_newline = true; + } + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + if is_named { + if needs_newline { + output.write(b"\n")?; + } + for _ in 0..indent_level { + output.write(b" ")?; + } + if let Some(field_name) = cursor.field_name() { + write!(output, "{}: ", field_name)?; + } + write!(output, "({}", node.kind())?; + if prints_position { + let start = node.start_position(); + let end = node.end_position(); + write!( + output, + " [{}, {}] - [{}, {}]", + start.row, + start.column, + end.row, + end.column + )?; + } + needs_newline = true; + } + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + } + } + } + return Ok(()); +} diff --git a/cli/src/test.rs b/cli/src/test.rs index f742e887f2..7a08e80595 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -1,4 +1,5 @@ use super::error::{Error, Result}; +use super::print::print_tree; use super::util; use ansi_term::Colour; use difference::{Changeset, Difference}; @@ -8,7 +9,7 @@ use regex::Regex; use std::char; use std::fs; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str; use tree_sitter::{Language, LogType, Parser}; @@ -30,6 +31,7 @@ pub enum TestEntry { Group { name: String, children: Vec, + file_path: Option, }, Example { name: String, @@ -44,6 +46,7 @@ impl Default for TestEntry { TestEntry::Group { name: String::new(), children: Vec::new(), + file_path: None, } } } @@ -54,43 +57,52 @@ pub fn run_tests_at_path( debug: bool, debug_graph: bool, filter: Option<&str>, + update: bool, ) -> Result<()> { - let test_entry = parse_tests(path)?; + let test_entry = parse_tests(path, false)?; let mut _log_session = None; let mut parser = Parser::new(); parser.set_language(language).map_err(|e| e.to_string())?; - if debug_graph { - _log_session = Some(util::log_graphs(&mut parser, "log.html")?); - } else if debug { - parser.set_logger(Some(Box::new(|log_type, message| { - if log_type == LogType::Lex { - io::stderr().write(b" ").unwrap(); - } - write!(&mut io::stderr(), "{}\n", message).unwrap(); - }))); - } - - let mut failures = Vec::new(); - if let TestEntry::Group { children, .. } = test_entry { - for child in children { - run_tests(&mut parser, child, filter, 0, &mut failures)?; + if !update { + if debug_graph { + _log_session = Some(util::log_graphs(&mut parser, "log.html")?); + } else if debug { + parser.set_logger(Some(Box::new(|log_type, message| { + if log_type == LogType::Lex { + io::stderr().write(b" ").unwrap(); + } + write!(&mut io::stderr(), "{}\n", message).unwrap(); + }))); } } - if failures.len() > 0 { + let mut diffs = Vec::new(); + let mut update_entries = Vec::new(); + run_tests(&mut parser, test_entry, filter, update, &mut update_entries, -1, &mut diffs)?; + + if diffs.len() > 0 { println!(""); - if failures.len() == 1 { - println!("1 failure:") + let diff_name = if update { "update" } else { "failure" }; + if diffs.len() == 1 { + println!("1 {}:", diff_name) } else { - println!("{} failures:", failures.len()) + println!("{} {}s:", diffs.len(), diff_name) } - print_diff_key(); - for (i, (name, actual, expected)) in failures.iter().enumerate() { + if update { + print_update_diff_key(); + } else { + print_diff_key(); + } + for (i, (name, parsed, provided)) in diffs.iter().enumerate() { println!("\n {}. {}:", i + 1, name); - print_diff(actual, expected); + if update { + print_update_diff(provided, parsed); + } else { + print_diff(parsed, provided); + } } Error::err(String::new()) } else { @@ -99,14 +111,40 @@ pub fn run_tests_at_path( } pub fn print_diff_key() { + print_diff_key_with_colors("actual", "expected", Colour::Red, Colour::Green); +} + +fn print_update_diff_key() { + print_diff_key_with_colors("original", "updated", Colour::Yellow, Colour::Green); +} + +fn print_diff_key_with_colors( + actual_name: &str, + expected_name: &str, + actual_color: Colour, + expected_color: Colour, +) { println!( "\n{} / {}", - Colour::Green.paint("expected"), - Colour::Red.paint("actual") + expected_color.paint(expected_name), + actual_color.paint(actual_name) ); } pub fn print_diff(actual: &String, expected: &String) { + print_diff_with_colors(actual, expected, Colour::Red, Colour::Green); +} + +fn print_update_diff(actual: &String, expected: &String) { + print_diff_with_colors(actual, expected, Colour::Yellow, Colour::Green); +} + +fn print_diff_with_colors( + actual: &String, + expected: &String, + actual_color: Colour, + expected_color: Colour, +) { let changeset = Changeset::new(actual, expected, " "); print!(" "); for diff in &changeset.diffs { @@ -115,10 +153,10 @@ pub fn print_diff(actual: &String, expected: &String) { print!("{}{}", part, changeset.split); } Difference::Add(part) => { - print!("{}{}", Colour::Green.paint(part), changeset.split); + print!("{}{}", expected_color.paint(part), changeset.split); } Difference::Rem(part) => { - print!("{}{}", Colour::Red.paint(part), changeset.split); + print!("{}{}", actual_color.paint(part), changeset.split); } } } @@ -129,8 +167,10 @@ fn run_tests( parser: &mut Parser, test_entry: TestEntry, filter: Option<&str>, + update: bool, + update_entries: &mut Vec<(String, String, String)>, mut indent_level: i32, - failures: &mut Vec<(String, String, String)>, + diffs: &mut Vec<(String, String, String)>, ) -> Result<()> { match test_entry { TestEntry::Example { @@ -141,39 +181,97 @@ fn run_tests( } => { if let Some(filter) = filter { if !name.contains(filter) { + if update { + let input = String::from_utf8(input).unwrap(); + update_entries.push((name, input, output)); + } return Ok(()); } } let tree = parser.parse(&input, None).unwrap(); - let mut actual = tree.root_node().to_sexp(); + let mut parsed = tree.root_node().to_sexp(); if !has_fields { - actual = strip_sexp_fields(actual); + parsed = strip_sexp_fields(parsed); } for _ in 0..indent_level { print!(" "); } - if actual == output { + let provided = normalize_sexp(&output); + if parsed == provided { println!("✓ {}", Colour::Green.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + update_entries.push((name, input, output)); + } } else { - println!("✗ {}", Colour::Red.paint(&name)); - failures.push((name, actual, output)); + if update { + let input = String::from_utf8(input).unwrap(); + let mut fixed_output = Vec::new(); + let mut cursor = tree.walk(); + print_tree(&mut fixed_output, &mut cursor, false)?; + let fixed_output = String::from_utf8(fixed_output).unwrap(); + update_entries.push((name.clone(), input, fixed_output)); + println!("✓ {}", Colour::Yellow.paint(&name)); + } else { + println!("✗ {}", Colour::Red.paint(&name)); + } + diffs.push((name, parsed, provided)); } } - TestEntry::Group { name, children } => { - for _ in 0..indent_level { - print!(" "); + TestEntry::Group { name, children, file_path } => { + if indent_level >= 0 { + for _ in 0..indent_level { + print!(" "); + } + println!("{}:", name); } - println!("{}:", name); + + let diff_count = diffs.len(); + indent_level += 1; for child in children { - run_tests(parser, child, filter, indent_level, failures)?; + run_tests(parser, child, filter, update, update_entries, indent_level, diffs)?; } + + if let Some(file_path) = file_path { + if update && diffs.len() - diff_count > 0 { + write_tests(&file_path, &update_entries)?; + } + update_entries.clear(); + } + } + } + Ok(()) +} + +fn write_tests(file_path: &Path, update_entries: &Vec<(String, String, String)>) -> Result<()> { + let mut buffer = fs::File::create(file_path)?; + write_tests_to_buffer(&mut buffer, update_entries) +} + +fn write_tests_to_buffer( + buffer: &mut Write, + update_entries: &Vec<(String, String, String)>, +) -> Result<()> { + for (i, (name, input, output)) in update_entries.iter().enumerate() { + if i > 0 { + write!(buffer, "\n")?; } + write!( + buffer, + "{}\n{}\n{}\n{}\n{}\n\n{}\n", + "=".repeat(80), + name, + "=".repeat(80), + input, + "-".repeat(80), + output.trim() + )?; } Ok(()) } -pub fn parse_tests(path: &Path) -> io::Result { +pub fn parse_tests(path: &Path, norm_sexp: bool) -> io::Result { let name = path .file_stem() .and_then(|s| s.to_str()) @@ -189,13 +287,13 @@ pub fn parse_tests(path: &Path) -> io::Result { .unwrap_or("") .starts_with("."); if !hidden { - children.push(parse_tests(&entry.path())?); + children.push(parse_tests(&entry.path(), norm_sexp)?); } } - Ok(TestEntry::Group { name, children }) + Ok(TestEntry::Group { name, children, file_path: None }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content)) + Ok(parse_test_content(name, content, Some(path.to_path_buf()), norm_sexp)) } } @@ -203,7 +301,12 @@ pub fn strip_sexp_fields(sexp: String) -> String { SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() } -fn parse_test_content(name: String, content: String) -> TestEntry { +fn parse_test_content( + name: String, + content: String, + file_path: Option, + norm_sexp: bool, +) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut previous_name = String::new(); @@ -224,8 +327,11 @@ fn parse_test_content(name: String, content: String) -> TestEntry { ); if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) { let input = bytes[previous_header_end..divider_start].to_vec(); - let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string(); - let output = output.replace(" )", ")"); + let output = if norm_sexp { + normalize_sexp(output) + } else { + output.to_owned() + }; let has_fields = SEXP_FIELD_REGEX.is_match(&output); children.push(TestEntry::Example { name: previous_name, @@ -241,7 +347,13 @@ fn parse_test_content(name: String, content: String) -> TestEntry { .to_string(); previous_header_end = header_end; } - TestEntry::Group { name, children } + TestEntry::Group { name, children, file_path } +} + +fn normalize_sexp(sexp: &str) -> String { + let sexp = WHITESPACE_REGEX.replace_all(sexp.trim(), " ").to_string(); + let sexp = sexp.replace(" )", ")"); + return sexp; } #[cfg(test)] @@ -273,6 +385,8 @@ d "# .trim() .to_string(), + None, + true, ); assert_eq!( @@ -292,8 +406,49 @@ d output: "(d)".to_string(), has_fields: false, }, - ] + ], + file_path: None } ); } + + #[test] + fn test_write_tests_to_buffer() { + let mut buffer = Vec::new(); + let update_entries = vec![ + ( + "title 1".to_string(), + "input 1".to_string(), + "output 1".to_string(), + ), + ( + "title 2".to_string(), + "input 2".to_string(), + "output 2".to_string(), + ), + ]; + write_tests_to_buffer(&mut buffer, &update_entries).unwrap(); + assert_eq!( + String::from_utf8(buffer).unwrap(), + r#" +================================================================================ +title 1 +================================================================================ +input 1 +-------------------------------------------------------------------------------- + +output 1 + +================================================================================ +title 2 +================================================================================ +input 2 +-------------------------------------------------------------------------------- + +output 2 +"# + .trim_start() + .to_string() + ); + } } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index a8adce5f53..dae8754240 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -59,8 +59,8 @@ fn test_real_language_corpus_files() { let language = get_language(language_name); let corpus_dir = grammars_dir.join(language_name).join("corpus"); let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name)); - let main_tests = parse_tests(&corpus_dir).unwrap(); - let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default()); + let main_tests = parse_tests(&corpus_dir, true).unwrap(); + let error_tests = parse_tests(&error_corpus_file, true).unwrap_or(TestEntry::default()); let mut tests = flatten_tests(main_tests); tests.extend(flatten_tests(error_tests)); @@ -243,7 +243,7 @@ fn test_feature_corpus_files() { let corpus_path = test_path.join("corpus.txt"); let c_code = generate_result.unwrap().1; let language = get_test_language(language_name, &c_code, Some(&test_path)); - let test = parse_tests(&corpus_path).unwrap(); + let test = parse_tests(&corpus_path, true).unwrap(); let tests = flatten_tests(test); if !tests.is_empty() { @@ -381,7 +381,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { } result.push((name, input, output, has_fields)); } - TestEntry::Group { mut name, children } => { + TestEntry::Group { mut name, children, .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); name.insert_str(0, prefix); From 807fdf3ef0bbefade37607e923ab3893a420d50b Mon Sep 17 00:00:00 2001 From: Ika Date: Fri, 6 Sep 2019 10:42:37 +0800 Subject: [PATCH 003/282] Revert "feat(cli): support snapshot testing with `--update` flag" This reverts commit 1b033fdfa45faf14672f5f8895aef85eb136da75. --- cli/src/lib.rs | 1 - cli/src/main.rs | 6 +- cli/src/parse.rs | 53 +++++++- cli/src/print.rs | 60 --------- cli/src/test.rs | 251 +++++++---------------------------- cli/src/tests/corpus_test.rs | 8 +- 6 files changed, 105 insertions(+), 274 deletions(-) delete mode 100644 cli/src/print.rs diff --git a/cli/src/lib.rs b/cli/src/lib.rs index e996083af7..33a9904faa 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -9,7 +9,6 @@ pub mod test; pub mod util; pub mod wasm; pub mod web_ui; -pub mod print; #[cfg(test)] mod tests; diff --git a/cli/src/main.rs b/cli/src/main.rs index 7a5adbf19f..59d04a97cf 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -83,8 +83,7 @@ fn run() -> error::Result<()> { .takes_value(true), ) .arg(Arg::with_name("debug").long("debug").short("d")) - .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) - .arg(Arg::with_name("update").long("update").short("u")), + .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) .subcommand( SubCommand::with_name("highlight") @@ -151,10 +150,9 @@ fn run() -> error::Result<()> { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); let filter = matches.value_of("filter"); - let update = matches.is_present("update"); let corpus_path = current_dir.join("corpus"); if let Some(language) = loader.languages_at_path(¤t_dir)?.first() { - test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?; + test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter)?; } else { eprintln!("No language found"); } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 065ffb026a..d1ddb49924 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -1,5 +1,4 @@ use super::error::{Error, Result}; -use super::print::print_tree; use super::util; use std::io::{self, Write}; use std::path::Path; @@ -82,7 +81,57 @@ pub fn parse_file_at_path( let mut cursor = tree.walk(); if !quiet { - print_tree(&mut stdout, &mut cursor, true)?; + let mut needs_newline = false; + let mut indent_level = 0; + let mut did_visit_children = false; + loop { + let node = cursor.node(); + let is_named = node.is_named(); + if did_visit_children { + if is_named { + stdout.write(b")")?; + needs_newline = true; + } + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + if is_named { + if needs_newline { + stdout.write(b"\n")?; + } + for _ in 0..indent_level { + stdout.write(b" ")?; + } + let start = node.start_position(); + let end = node.end_position(); + if let Some(field_name) = cursor.field_name() { + write!(&mut stdout, "{}: ", field_name)?; + } + write!( + &mut stdout, + "({} [{}, {}] - [{}, {}]", + node.kind(), + start.row, + start.column, + end.row, + end.column + )?; + needs_newline = true; + } + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + } + } + } cursor.reset(tree.root_node()); println!(""); } diff --git a/cli/src/print.rs b/cli/src/print.rs deleted file mode 100644 index 7a57c625fa..0000000000 --- a/cli/src/print.rs +++ /dev/null @@ -1,60 +0,0 @@ -use super::error::{Result}; -use std::io::{Write}; -use tree_sitter::{TreeCursor}; - -pub fn print_tree(output: &mut Write, cursor: &mut TreeCursor, prints_position: bool) -> Result<()> { - let mut needs_newline = false; - let mut indent_level = 0; - let mut did_visit_children = false; - loop { - let node = cursor.node(); - let is_named = node.is_named(); - if did_visit_children { - if is_named { - output.write(b")")?; - needs_newline = true; - } - if cursor.goto_next_sibling() { - did_visit_children = false; - } else if cursor.goto_parent() { - did_visit_children = true; - indent_level -= 1; - } else { - break; - } - } else { - if is_named { - if needs_newline { - output.write(b"\n")?; - } - for _ in 0..indent_level { - output.write(b" ")?; - } - if let Some(field_name) = cursor.field_name() { - write!(output, "{}: ", field_name)?; - } - write!(output, "({}", node.kind())?; - if prints_position { - let start = node.start_position(); - let end = node.end_position(); - write!( - output, - " [{}, {}] - [{}, {}]", - start.row, - start.column, - end.row, - end.column - )?; - } - needs_newline = true; - } - if cursor.goto_first_child() { - did_visit_children = false; - indent_level += 1; - } else { - did_visit_children = true; - } - } - } - return Ok(()); -} diff --git a/cli/src/test.rs b/cli/src/test.rs index 7a08e80595..f742e887f2 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -1,5 +1,4 @@ use super::error::{Error, Result}; -use super::print::print_tree; use super::util; use ansi_term::Colour; use difference::{Changeset, Difference}; @@ -9,7 +8,7 @@ use regex::Regex; use std::char; use std::fs; use std::io::{self, Write}; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::str; use tree_sitter::{Language, LogType, Parser}; @@ -31,7 +30,6 @@ pub enum TestEntry { Group { name: String, children: Vec, - file_path: Option, }, Example { name: String, @@ -46,7 +44,6 @@ impl Default for TestEntry { TestEntry::Group { name: String::new(), children: Vec::new(), - file_path: None, } } } @@ -57,52 +54,43 @@ pub fn run_tests_at_path( debug: bool, debug_graph: bool, filter: Option<&str>, - update: bool, ) -> Result<()> { - let test_entry = parse_tests(path, false)?; + let test_entry = parse_tests(path)?; let mut _log_session = None; let mut parser = Parser::new(); parser.set_language(language).map_err(|e| e.to_string())?; - if !update { - if debug_graph { - _log_session = Some(util::log_graphs(&mut parser, "log.html")?); - } else if debug { - parser.set_logger(Some(Box::new(|log_type, message| { - if log_type == LogType::Lex { - io::stderr().write(b" ").unwrap(); - } - write!(&mut io::stderr(), "{}\n", message).unwrap(); - }))); - } + if debug_graph { + _log_session = Some(util::log_graphs(&mut parser, "log.html")?); + } else if debug { + parser.set_logger(Some(Box::new(|log_type, message| { + if log_type == LogType::Lex { + io::stderr().write(b" ").unwrap(); + } + write!(&mut io::stderr(), "{}\n", message).unwrap(); + }))); } - let mut diffs = Vec::new(); - let mut update_entries = Vec::new(); - run_tests(&mut parser, test_entry, filter, update, &mut update_entries, -1, &mut diffs)?; + let mut failures = Vec::new(); + if let TestEntry::Group { children, .. } = test_entry { + for child in children { + run_tests(&mut parser, child, filter, 0, &mut failures)?; + } + } - if diffs.len() > 0 { + if failures.len() > 0 { println!(""); - let diff_name = if update { "update" } else { "failure" }; - if diffs.len() == 1 { - println!("1 {}:", diff_name) + if failures.len() == 1 { + println!("1 failure:") } else { - println!("{} {}s:", diffs.len(), diff_name) + println!("{} failures:", failures.len()) } - if update { - print_update_diff_key(); - } else { - print_diff_key(); - } - for (i, (name, parsed, provided)) in diffs.iter().enumerate() { + print_diff_key(); + for (i, (name, actual, expected)) in failures.iter().enumerate() { println!("\n {}. {}:", i + 1, name); - if update { - print_update_diff(provided, parsed); - } else { - print_diff(parsed, provided); - } + print_diff(actual, expected); } Error::err(String::new()) } else { @@ -111,40 +99,14 @@ pub fn run_tests_at_path( } pub fn print_diff_key() { - print_diff_key_with_colors("actual", "expected", Colour::Red, Colour::Green); -} - -fn print_update_diff_key() { - print_diff_key_with_colors("original", "updated", Colour::Yellow, Colour::Green); -} - -fn print_diff_key_with_colors( - actual_name: &str, - expected_name: &str, - actual_color: Colour, - expected_color: Colour, -) { println!( "\n{} / {}", - expected_color.paint(expected_name), - actual_color.paint(actual_name) + Colour::Green.paint("expected"), + Colour::Red.paint("actual") ); } pub fn print_diff(actual: &String, expected: &String) { - print_diff_with_colors(actual, expected, Colour::Red, Colour::Green); -} - -fn print_update_diff(actual: &String, expected: &String) { - print_diff_with_colors(actual, expected, Colour::Yellow, Colour::Green); -} - -fn print_diff_with_colors( - actual: &String, - expected: &String, - actual_color: Colour, - expected_color: Colour, -) { let changeset = Changeset::new(actual, expected, " "); print!(" "); for diff in &changeset.diffs { @@ -153,10 +115,10 @@ fn print_diff_with_colors( print!("{}{}", part, changeset.split); } Difference::Add(part) => { - print!("{}{}", expected_color.paint(part), changeset.split); + print!("{}{}", Colour::Green.paint(part), changeset.split); } Difference::Rem(part) => { - print!("{}{}", actual_color.paint(part), changeset.split); + print!("{}{}", Colour::Red.paint(part), changeset.split); } } } @@ -167,10 +129,8 @@ fn run_tests( parser: &mut Parser, test_entry: TestEntry, filter: Option<&str>, - update: bool, - update_entries: &mut Vec<(String, String, String)>, mut indent_level: i32, - diffs: &mut Vec<(String, String, String)>, + failures: &mut Vec<(String, String, String)>, ) -> Result<()> { match test_entry { TestEntry::Example { @@ -181,97 +141,39 @@ fn run_tests( } => { if let Some(filter) = filter { if !name.contains(filter) { - if update { - let input = String::from_utf8(input).unwrap(); - update_entries.push((name, input, output)); - } return Ok(()); } } let tree = parser.parse(&input, None).unwrap(); - let mut parsed = tree.root_node().to_sexp(); + let mut actual = tree.root_node().to_sexp(); if !has_fields { - parsed = strip_sexp_fields(parsed); + actual = strip_sexp_fields(actual); } for _ in 0..indent_level { print!(" "); } - let provided = normalize_sexp(&output); - if parsed == provided { + if actual == output { println!("✓ {}", Colour::Green.paint(&name)); - if update { - let input = String::from_utf8(input).unwrap(); - update_entries.push((name, input, output)); - } } else { - if update { - let input = String::from_utf8(input).unwrap(); - let mut fixed_output = Vec::new(); - let mut cursor = tree.walk(); - print_tree(&mut fixed_output, &mut cursor, false)?; - let fixed_output = String::from_utf8(fixed_output).unwrap(); - update_entries.push((name.clone(), input, fixed_output)); - println!("✓ {}", Colour::Yellow.paint(&name)); - } else { - println!("✗ {}", Colour::Red.paint(&name)); - } - diffs.push((name, parsed, provided)); + println!("✗ {}", Colour::Red.paint(&name)); + failures.push((name, actual, output)); } } - TestEntry::Group { name, children, file_path } => { - if indent_level >= 0 { - for _ in 0..indent_level { - print!(" "); - } - println!("{}:", name); + TestEntry::Group { name, children } => { + for _ in 0..indent_level { + print!(" "); } - - let diff_count = diffs.len(); - + println!("{}:", name); indent_level += 1; for child in children { - run_tests(parser, child, filter, update, update_entries, indent_level, diffs)?; + run_tests(parser, child, filter, indent_level, failures)?; } - - if let Some(file_path) = file_path { - if update && diffs.len() - diff_count > 0 { - write_tests(&file_path, &update_entries)?; - } - update_entries.clear(); - } - } - } - Ok(()) -} - -fn write_tests(file_path: &Path, update_entries: &Vec<(String, String, String)>) -> Result<()> { - let mut buffer = fs::File::create(file_path)?; - write_tests_to_buffer(&mut buffer, update_entries) -} - -fn write_tests_to_buffer( - buffer: &mut Write, - update_entries: &Vec<(String, String, String)>, -) -> Result<()> { - for (i, (name, input, output)) in update_entries.iter().enumerate() { - if i > 0 { - write!(buffer, "\n")?; } - write!( - buffer, - "{}\n{}\n{}\n{}\n{}\n\n{}\n", - "=".repeat(80), - name, - "=".repeat(80), - input, - "-".repeat(80), - output.trim() - )?; } Ok(()) } -pub fn parse_tests(path: &Path, norm_sexp: bool) -> io::Result { +pub fn parse_tests(path: &Path) -> io::Result { let name = path .file_stem() .and_then(|s| s.to_str()) @@ -287,13 +189,13 @@ pub fn parse_tests(path: &Path, norm_sexp: bool) -> io::Result { .unwrap_or("") .starts_with("."); if !hidden { - children.push(parse_tests(&entry.path(), norm_sexp)?); + children.push(parse_tests(&entry.path())?); } } - Ok(TestEntry::Group { name, children, file_path: None }) + Ok(TestEntry::Group { name, children }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content, Some(path.to_path_buf()), norm_sexp)) + Ok(parse_test_content(name, content)) } } @@ -301,12 +203,7 @@ pub fn strip_sexp_fields(sexp: String) -> String { SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() } -fn parse_test_content( - name: String, - content: String, - file_path: Option, - norm_sexp: bool, -) -> TestEntry { +fn parse_test_content(name: String, content: String) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut previous_name = String::new(); @@ -327,11 +224,8 @@ fn parse_test_content( ); if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) { let input = bytes[previous_header_end..divider_start].to_vec(); - let output = if norm_sexp { - normalize_sexp(output) - } else { - output.to_owned() - }; + let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string(); + let output = output.replace(" )", ")"); let has_fields = SEXP_FIELD_REGEX.is_match(&output); children.push(TestEntry::Example { name: previous_name, @@ -347,13 +241,7 @@ fn parse_test_content( .to_string(); previous_header_end = header_end; } - TestEntry::Group { name, children, file_path } -} - -fn normalize_sexp(sexp: &str) -> String { - let sexp = WHITESPACE_REGEX.replace_all(sexp.trim(), " ").to_string(); - let sexp = sexp.replace(" )", ")"); - return sexp; + TestEntry::Group { name, children } } #[cfg(test)] @@ -385,8 +273,6 @@ d "# .trim() .to_string(), - None, - true, ); assert_eq!( @@ -406,49 +292,8 @@ d output: "(d)".to_string(), has_fields: false, }, - ], - file_path: None + ] } ); } - - #[test] - fn test_write_tests_to_buffer() { - let mut buffer = Vec::new(); - let update_entries = vec![ - ( - "title 1".to_string(), - "input 1".to_string(), - "output 1".to_string(), - ), - ( - "title 2".to_string(), - "input 2".to_string(), - "output 2".to_string(), - ), - ]; - write_tests_to_buffer(&mut buffer, &update_entries).unwrap(); - assert_eq!( - String::from_utf8(buffer).unwrap(), - r#" -================================================================================ -title 1 -================================================================================ -input 1 --------------------------------------------------------------------------------- - -output 1 - -================================================================================ -title 2 -================================================================================ -input 2 --------------------------------------------------------------------------------- - -output 2 -"# - .trim_start() - .to_string() - ); - } } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index dae8754240..a8adce5f53 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -59,8 +59,8 @@ fn test_real_language_corpus_files() { let language = get_language(language_name); let corpus_dir = grammars_dir.join(language_name).join("corpus"); let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name)); - let main_tests = parse_tests(&corpus_dir, true).unwrap(); - let error_tests = parse_tests(&error_corpus_file, true).unwrap_or(TestEntry::default()); + let main_tests = parse_tests(&corpus_dir).unwrap(); + let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default()); let mut tests = flatten_tests(main_tests); tests.extend(flatten_tests(error_tests)); @@ -243,7 +243,7 @@ fn test_feature_corpus_files() { let corpus_path = test_path.join("corpus.txt"); let c_code = generate_result.unwrap().1; let language = get_test_language(language_name, &c_code, Some(&test_path)); - let test = parse_tests(&corpus_path, true).unwrap(); + let test = parse_tests(&corpus_path).unwrap(); let tests = flatten_tests(test); if !tests.is_empty() { @@ -381,7 +381,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { } result.push((name, input, output, has_fields)); } - TestEntry::Group { mut name, children, .. } => { + TestEntry::Group { mut name, children } => { if !prefix.is_empty() { name.insert_str(0, " - "); name.insert_str(0, prefix); From d88dae7a3e15f8130a6b54f2f900ed2b109a7613 Mon Sep 17 00:00:00 2001 From: Ika Date: Fri, 6 Sep 2019 10:57:59 +0800 Subject: [PATCH 004/282] feat(cli): support snapshot testing with `--update` flag This PR adds an `--update` flag to the `tree-sitter test` command, which adds the ability to replace the _expected_ output in the corpus.txt with the _actual_ output produced by the parser, that is, we can now simply use this `--update` flag to write all the corresponding parser output back to the corpus.txt, and we just need to check the output without typing its actual sexp. - use the same output format as `tree-sitter parse`, except there won't be any position information printed. - the corpus.txt won't be touched if there's no difference between the _expected_ output and the _actual_ output in that file. - if there're differences between _expected_ and _actual_, _expected_ will be replaced by _actual_ and the whole file will be reformatted, i.e., all the output sexp will be formatted just like the output from `tree-sitter parse` and all the delimiters `===`/`---` will be normalized as 80-column long. - this flag also works with `--filter` flag. --- cli/src/main.rs | 4 +- cli/src/test.rs | 238 +++++++++++++++++++++++++++++++---- cli/src/tests/corpus_test.rs | 2 +- 3 files changed, 215 insertions(+), 29 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 59d04a97cf..84b13da8a8 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -82,6 +82,7 @@ fn run() -> error::Result<()> { .short("f") .takes_value(true), ) + .arg(Arg::with_name("update").long("update").short("u")) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) @@ -150,9 +151,10 @@ fn run() -> error::Result<()> { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); let filter = matches.value_of("filter"); + let update = matches.is_present("update"); let corpus_path = current_dir.join("corpus"); if let Some(language) = loader.languages_at_path(¤t_dir)?.first() { - test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter)?; + test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?; } else { eprintln!("No language found"); } diff --git a/cli/src/test.rs b/cli/src/test.rs index c0f5e6a740..c57d72ada1 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -6,9 +6,10 @@ use lazy_static::lazy_static; use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; use regex::Regex; use std::char; +use std::fmt::Write as FmtWrite; use std::fs; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str; use tree_sitter::{Language, LogType, Parser}; @@ -30,6 +31,7 @@ pub enum TestEntry { Group { name: String, children: Vec, + file_path: Option, }, Example { name: String, @@ -44,6 +46,7 @@ impl Default for TestEntry { TestEntry::Group { name: String::new(), children: Vec::new(), + file_path: None, } } } @@ -54,6 +57,7 @@ pub fn run_tests_at_path( debug: bool, debug_graph: bool, filter: Option<&str>, + update: bool, ) -> Result<()> { let test_entry = parse_tests(path)?; let mut _log_session = None; @@ -72,27 +76,37 @@ pub fn run_tests_at_path( } let mut failures = Vec::new(); - if let TestEntry::Group { children, .. } = test_entry { - for child in children { - run_tests(&mut parser, child, filter, 0, &mut failures)?; - } - } + let mut corrected_entries = Vec::new(); + run_tests(&mut parser, test_entry, filter, 0, &mut failures, update, &mut corrected_entries)?; if failures.len() > 0 { println!(""); - if failures.len() == 1 { - println!("1 failure:") + if update { + if failures.len() == 1 { + println!("1 update:\n") + } else { + println!("{} updates:\n", failures.len()) + } + + for (i, (name, ..)) in failures.iter().enumerate() { + println!(" {}. {}", i + 1, name); + } + Ok(()) } else { - println!("{} failures:", failures.len()) - } + if failures.len() == 1 { + println!("1 failure:") + } else { + println!("{} failures:", failures.len()) + } - print_diff_key(); - for (i, (name, actual, expected)) in failures.iter().enumerate() { - println!("\n {}. {}:", i + 1, name); - print_diff(actual, expected); + print_diff_key(); + for (i, (name, actual, expected)) in failures.iter().enumerate() { + println!("\n {}. {}:", i + 1, name); + print_diff(actual, expected); + } + Error::err(String::new()) } - Error::err(String::new()) } else { Ok(()) } @@ -131,6 +145,8 @@ fn run_tests( filter: Option<&str>, mut indent_level: i32, failures: &mut Vec<(String, String, String)>, + update: bool, + corrected_entries: &mut Vec<(String, String, String)>, ) -> Result<()> { match test_entry { TestEntry::Example { @@ -141,6 +157,11 @@ fn run_tests( } => { if let Some(filter) = filter { if !name.contains(filter) { + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&output); + corrected_entries.push((name, input, output)); + } return Ok(()); } } @@ -154,22 +175,123 @@ fn run_tests( } if actual == output { println!("✓ {}", Colour::Green.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&output); + corrected_entries.push((name, input, output)); + } } else { - println!("✗ {}", Colour::Red.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&actual); + corrected_entries.push((name.clone(), input, output)); + println!("✓ {}", Colour::Blue.paint(&name)); + } else { + println!("✗ {}", Colour::Red.paint(&name)); + } failures.push((name, actual, output)); } } - TestEntry::Group { name, children } => { - for _ in 0..indent_level { - print!(" "); + TestEntry::Group { name, children, file_path } => { + if indent_level > 0 { + for _ in 0..indent_level { + print!(" "); + } + println!("{}:", name); } - println!("{}:", name); + + let failure_count = failures.len(); + indent_level += 1; for child in children { - run_tests(parser, child, filter, indent_level, failures)?; + run_tests(parser, child, filter, indent_level, failures, update, corrected_entries)?; + } + + if let Some(file_path) = file_path { + if update && failures.len() - failure_count > 0 { + write_tests(&file_path, corrected_entries)?; + } + corrected_entries.clear(); + } + } + } + Ok(()) +} + +fn format_sexp(sexp: &String) -> String { + let mut formatted = String::new(); + + let mut indent_level = 0; + let mut has_field = false; + let mut s_iter = sexp.split(|c| c == ' ' || c == ')'); + while let Some(s) = s_iter.next() { + if s.is_empty() { + // ")" + indent_level -= 1; + write!(formatted, ")").unwrap(); + } else if s.starts_with('(') { + if has_field { + has_field = false; + } else { + if indent_level > 0 { + writeln!(formatted, "").unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + } + indent_level += 1; } + + // "(node_name" + write!(formatted, "{}", s).unwrap(); + + let mut c_iter = s.chars(); + c_iter.next(); + let second_char = c_iter.next().unwrap(); + if second_char == 'M' { + // "(MISSING node_name" + let s = s_iter.next().unwrap(); + write!(formatted, " {}", s).unwrap(); + } + } else if s.ends_with(':') { + // "field:" + writeln!(formatted, "").unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + write!(formatted, "{} ", s).unwrap(); + has_field = true; + indent_level += 1; } } + + formatted +} + +fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> { + let mut buffer = fs::File::create(file_path)?; + write_tests_to_buffer(&mut buffer, corrected_entries) +} + +fn write_tests_to_buffer( + buffer: &mut Write, + corrected_entries: &Vec<(String, String, String)>, +) -> Result<()> { + for (i, (name, input, output)) in corrected_entries.iter().enumerate() { + if i > 0 { + write!(buffer, "\n")?; + } + write!( + buffer, + "{}\n{}\n{}\n{}\n{}\n\n{}\n", + "=".repeat(80), + name, + "=".repeat(80), + input, + "-".repeat(80), + output.trim() + )?; + } Ok(()) } @@ -188,10 +310,10 @@ pub fn parse_tests(path: &Path) -> io::Result { children.push(parse_tests(&entry.path())?); } } - Ok(TestEntry::Group { name, children }) + Ok(TestEntry::Group { name, children, file_path: None }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content)) + Ok(parse_test_content(name, content, Some(path.to_path_buf()))) } } @@ -199,7 +321,7 @@ pub fn strip_sexp_fields(sexp: String) -> String { SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() } -fn parse_test_content(name: String, content: String) -> TestEntry { +fn parse_test_content(name: String, content: String, file_path: Option) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut prev_name = String::new(); @@ -250,7 +372,7 @@ fn parse_test_content(name: String, content: String) -> TestEntry { .to_string(); prev_header_end = header_end; } - TestEntry::Group { name, children } + TestEntry::Group { name, children, file_path } } #[cfg(test)] @@ -282,6 +404,7 @@ d "# .trim() .to_string(), + None, ); assert_eq!( @@ -301,7 +424,8 @@ d output: "(d)".to_string(), has_fields: false, }, - ] + ], + file_path: None, } ); } @@ -334,6 +458,7 @@ abc "# .trim() .to_string(), + None, ); assert_eq!( @@ -353,8 +478,67 @@ abc output: "(c (d))".to_string(), has_fields: false, }, - ] + ], + file_path: None, } ); } + + #[test] + fn test_format_sexp() { + assert_eq!( + format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()), + r#" +(a + b: (c) + (d) + e: (f + (g + (h + (MISSING i))))) +"# + .trim() + .to_string() + ); + } + + #[test] + fn test_write_tests_to_buffer() { + let mut buffer = Vec::new(); + let corrected_entries = vec![ + ( + "title 1".to_string(), + "input 1".to_string(), + "output 1".to_string(), + ), + ( + "title 2".to_string(), + "input 2".to_string(), + "output 2".to_string(), + ), + ]; + write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); + assert_eq!( + String::from_utf8(buffer).unwrap(), + r#" +================================================================================ +title 1 +================================================================================ +input 1 +-------------------------------------------------------------------------------- + +output 1 + +================================================================================ +title 2 +================================================================================ +input 2 +-------------------------------------------------------------------------------- + +output 2 +"# + .trim_start() + .to_string() + ); + } } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index a8adce5f53..ed6226f28e 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -381,7 +381,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { } result.push((name, input, output, has_fields)); } - TestEntry::Group { mut name, children } => { + TestEntry::Group { mut name, children, .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); name.insert_str(0, prefix); From f191858bae95d2daab860d8104a9e9f60cd1d351 Mon Sep 17 00:00:00 2001 From: Ika Date: Thu, 3 Oct 2019 21:24:17 +0800 Subject: [PATCH 005/282] fix: handle UNEXPECTED node --- cli/src/test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/test.rs b/cli/src/test.rs index c57d72ada1..544ec2495d 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -248,8 +248,8 @@ fn format_sexp(sexp: &String) -> String { let mut c_iter = s.chars(); c_iter.next(); let second_char = c_iter.next().unwrap(); - if second_char == 'M' { - // "(MISSING node_name" + if second_char == 'M' || second_char == 'U' { + // "(MISSING node_name" or "(UNEXPECTED 'x'" let s = s_iter.next().unwrap(); write!(formatted, " {}", s).unwrap(); } From 1635aab801152b8b36975dbc6cd02ea01ddbf778 Mon Sep 17 00:00:00 2001 From: Andy Pan Date: Sat, 11 Apr 2020 23:21:59 +0800 Subject: [PATCH 006/282] Fix exporting get/set timeout in web binding (#592) * Fix exporting get/set timeout in web binding Add two symbols "_ts_parser_set_timeout_micros", "_ts_parser_timeout_micros" due to usage in `tree-sitter.js`. * Fix getTimeoutMicros() not returning the value --- lib/binding_web/binding.js | 4 ++-- lib/binding_web/exports.json | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 96f6ef082f..f52f61d55f 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -129,8 +129,8 @@ class Parser { C._ts_parser_set_timeout_micros(this[0], timeout); } - getTimeoutMicros(timeout) { - C._ts_parser_timeout_micros(this[0]); + getTimeoutMicros() { + return C._ts_parser_timeout_micros(this[0]); } setLogger(callback) { diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index 33fbad7a53..2c63824900 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -68,6 +68,8 @@ "_ts_parser_new_wasm", "_ts_parser_parse_wasm", "_ts_parser_set_language", + "_ts_parser_set_timeout_micros", + "_ts_parser_timeout_micros", "_ts_query_capture_count", "_ts_query_capture_name_for_id", "_ts_query_captures_wasm", From af498bc0c37e212a3823d10c2cd87d64aa06d434 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Tue, 21 Apr 2020 00:05:01 -0400 Subject: [PATCH 007/282] Add a simple Makefile-based build system. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This produces static/shared libraries as well as a pkg-config file, and installs them to the standard Unix hierarchy. GNU Make is assumed as features like $(wildcard ...) or $(shell uname) may not work elsewhere, but it should otherwise build on most/all Unix platforms. Note that we assume the following POSIX default rules/macros exist, so we don't bother redefining them: - pattern rules for compiling .c -> .o - $(CC) - $(AR) Special note on the .pc file generation: we do most variable replacements in one go, but delay @PREFIX@ so that we can first find existing substring instances of the prefix value (if libdir/includedir reside within the prefix), and update them to use a literal pkg-config variable '${prefix}'. This is fairly compact (one single sed) while still letting us produce pkg-config files that support runtime redefinition à la cross-compilation. --- Makefile | 71 +++++++++++++++++++++++++++++++++++++++++++++++ tree-sitter.pc.in | 10 +++++++ 2 files changed, 81 insertions(+) create mode 100644 Makefile create mode 100644 tree-sitter.pc.in diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..95f53b9abe --- /dev/null +++ b/Makefile @@ -0,0 +1,71 @@ +VERSION := 0.6.3 + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# collect sources +ifneq ($(AMALGAMATED),1) + SRC := $(wildcard lib/src/*.c) + # do not double-include amalgamation + SRC := $(filter-out lib/src/lib.c,$(SRC)) +else + # use amalgamated build + SRC := lib/src/lib.c +endif +OBJ := $(SRC:.c=.o) + +# define default flags, and override to append mandatory flags +CFLAGS ?= -O3 +override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include + +# ABI versioning +SONAME_MAJOR := 0 +SONAME_MINOR := 0 + +# OS-specific bits +ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib +else + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR) +endif +ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly)) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: libtree-sitter.a libtree-sitter.$(SOEXTVER) + +libtree-sitter.a: $(OBJ) + $(AR) rcs $@ $^ + +libtree-sitter.$(SOEXTVER): $(OBJ) + $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ + ln -sf $@ libtree-sitter.$(SOEXT) + ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) + +install: all + install -d '$(DESTDIR)$(LIBDIR)' + install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a + install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter + install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ + install -d '$(DESTDIR)$(PCLIBDIR)' + sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' \ + tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + +clean: + rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) + +.PHONY: all install clean diff --git a/tree-sitter.pc.in b/tree-sitter.pc.in new file mode 100644 index 0000000000..f98816cb7b --- /dev/null +++ b/tree-sitter.pc.in @@ -0,0 +1,10 @@ +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: tree-sitter +Description: An incremental parsing system for programming tools +URL: https://tree-sitter.github.io/ +Version: @VERSION@ +Libs: -L${libdir} -ltree-sitter +Cflags: -I${includedir} From b4c252051d884b82d721d65200b9d4a8678e50fd Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Tue, 21 Apr 2020 22:28:26 -0400 Subject: [PATCH 008/282] add Make-based build to travis --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 0923033a06..98ca9ccf40 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,9 @@ script: # Build the WASM binding - (eval "$WASM_ENV" && script/build-wasm) + # build the shared/static libraries + - make + # Build the CLI - cargo build --release From 780e9cecc9ee1e707d8f45fd0d93d4ee07325725 Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Tue, 28 Apr 2020 13:38:08 +0200 Subject: [PATCH 009/282] Do not use multiple unnamed structs inside of unions --- cli/src/generate/render.rs | 2 +- lib/include/tree_sitter/parser.h | 50 ++++++++++++++++++++------------ lib/src/language.c | 4 +-- lib/src/language.h | 2 +- lib/src/parser.c | 28 +++++++++--------- lib/src/subtree.c | 12 ++++---- 6 files changed, 56 insertions(+), 42 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index e8c59d07b3..67cd2fbed0 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1022,7 +1022,7 @@ impl Generator { for (i, entry) in parse_table_entries { add!( self, - " [{}] = {{.count = {}, .reusable = {}}},", + " [{}] = {{ .entry = {{.count = {}, .reusable = {}}} }},", i, entry.actions.len(), entry.reusable diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 9df91f8c3c..11bf4fc42a 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -62,13 +62,13 @@ typedef struct { TSStateId state; bool extra : 1; bool repetition : 1; - }; + } shift; struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; uint8_t production_id; - }; + } reduce; } params; TSParseActionType type : 4; } TSParseAction; @@ -83,7 +83,7 @@ typedef union { struct { uint8_t count; bool reusable : 1; - }; + } entry; } TSParseActionEntry; struct TSLanguage { @@ -167,22 +167,28 @@ struct TSLanguage { #define ACTIONS(id) id -#define SHIFT(state_value) \ - { \ - { \ - .type = TSParseActionTypeShift, \ - .params = {.state = state_value}, \ - } \ +#define SHIFT(state_value) \ + { \ + { \ + .params = { \ + .shift = { \ + .state = state_value \ + } \ + }, \ + .type = TSParseActionTypeShift \ + } \ } #define SHIFT_REPEAT(state_value) \ { \ { \ - .type = TSParseActionTypeShift, \ .params = { \ - .state = state_value, \ - .repetition = true \ + .shift = { \ + .state = state_value, \ + .repetition = true \ + } \ }, \ + .type = TSParseActionTypeShift \ } \ } @@ -194,20 +200,26 @@ struct TSLanguage { #define SHIFT_EXTRA() \ { \ { \ - .type = TSParseActionTypeShift, \ - .params = {.extra = true} \ + .params = { \ + .shift = { \ + .extra = true \ + } \ + }, \ + .type = TSParseActionTypeShift \ } \ } #define REDUCE(symbol_val, child_count_val, ...) \ { \ { \ - .type = TSParseActionTypeReduce, \ .params = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - } \ + .reduce = { \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }, \ + .type = TSParseActionTypeReduce \ } \ } diff --git a/lib/src/language.c b/lib/src/language.c index a396b4b0b6..c00c49e3c0 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -33,8 +33,8 @@ void ts_language_table_entry( assert(symbol < self->token_count); uint32_t action_index = ts_language_lookup(self, state, symbol); const TSParseActionEntry *entry = &self->parse_actions[action_index]; - result->action_count = entry->count; - result->is_reusable = entry->reusable; + result->action_count = entry->entry.count; + result->is_reusable = entry->entry.reusable; result->actions = (const TSParseAction *)(entry + 1); } } diff --git a/lib/src/language.h b/lib/src/language.h index f908b4593a..341f0f85af 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -93,7 +93,7 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self, if (count > 0) { TSParseAction action = actions[count - 1]; if (action.type == TSParseActionTypeShift) { - return action.params.extra ? state : action.params.state; + return action.params.shift.extra ? state : action.params.shift.state; } } return 0; diff --git a/lib/src/parser.c b/lib/src/parser.c index d4b227308b..19add152f1 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -951,15 +951,15 @@ static bool ts_parser__do_all_potential_reductions( switch (action.type) { case TSParseActionTypeShift: case TSParseActionTypeRecover: - if (!action.params.extra && !action.params.repetition) has_shift_action = true; + if (!action.params.shift.extra && !action.params.shift.repetition) has_shift_action = true; break; case TSParseActionTypeReduce: - if (action.params.child_count > 0) + if (action.params.reduce.child_count > 0) ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ - .symbol = action.params.symbol, - .count = action.params.child_count, - .dynamic_precedence = action.params.dynamic_precedence, - .production_id = action.params.production_id, + .symbol = action.params.reduce.symbol, + .count = action.params.reduce.child_count, + .dynamic_precedence = action.params.reduce.dynamic_precedence, + .production_id = action.params.reduce.production_id, }); default: break; @@ -1250,7 +1250,7 @@ static void ts_parser__recover( // be counted in error cost calculations. unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra) { + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.shift.extra) { MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); ts_subtree_set_extra(&mutable_lookahead); lookahead = ts_subtree_from_mut(mutable_lookahead); @@ -1379,9 +1379,9 @@ static bool ts_parser__advance( switch (action.type) { case TSParseActionTypeShift: { - if (action.params.repetition) break; + if (action.params.shift.repetition) break; TSStateId next_state; - if (action.params.extra) { + if (action.params.shift.extra) { // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out. if (state == ERROR_STATE) continue; @@ -1389,7 +1389,7 @@ static bool ts_parser__advance( next_state = state; LOG("shift_extra"); } else { - next_state = action.params.state; + next_state = action.params.shift.state; LOG("shift state:%u", next_state); } @@ -1398,7 +1398,7 @@ static bool ts_parser__advance( next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); } - ts_parser__shift(self, version, next_state, lookahead, action.params.extra); + ts_parser__shift(self, version, next_state, lookahead, action.params.shift.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); return true; } @@ -1406,10 +1406,10 @@ static bool ts_parser__advance( case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; bool is_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count); StackVersion reduction_version = ts_parser__reduce( - self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.production_id, + self, version, action.params.reduce.symbol, action.params.reduce.child_count, + action.params.reduce.dynamic_precedence, action.params.reduce.production_id, is_fragile, is_extra ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/lib/src/subtree.c b/lib/src/subtree.c index b98f172339..ef92a32fe4 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -21,7 +21,7 @@ typedef struct { #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX #define TS_MAX_TREE_POOL_SIZE 32 -static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}}; +static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; // ExternalScannerState @@ -208,7 +208,7 @@ Subtree ts_subtree_new_leaf( .has_external_tokens = has_external_tokens, .is_missing = false, .is_keyword = is_keyword, - .first_leaf = {.symbol = 0, .parse_state = 0}, + {{.first_leaf = {.symbol = 0, .parse_state = 0}}} }; return (Subtree) {.ptr = data}; } @@ -464,15 +464,17 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .production_id = production_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, .fragile_left = fragile, .fragile_right = fragile, .is_keyword = false, - .node_count = 0, - .first_leaf = {.symbol = 0, .parse_state = 0}, + {{ + .node_count = 0, + .production_id = production_id, + .first_leaf = {.symbol = 0, .parse_state = 0}, + }} }; MutableSubtree result = {.ptr = data}; ts_subtree_set_children(result, children->contents, children->size, language); From a04d688d3895e17bf6111722aa57b7148d8f9df1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 6 May 2020 09:34:23 -0700 Subject: [PATCH 010/282] docs: Add a link to tree-sitter-wasm Closes #605 --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index 052e928090..8551d1ebcf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -51,6 +51,7 @@ Parsers for these languages are fairly complete: * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) * [Vue](https://github.com/ikatyang/tree-sitter-vue) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) +* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) Parsers for these languages are in development: From 3456a21f0de48bac180dcfd1e48bc5c4dddb3976 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 6 May 2020 12:47:32 -0700 Subject: [PATCH 011/282] Start work on restructuring query implementation to deal w/ optionals and repeats better --- .gitignore | 1 + cli/src/tests/query_test.rs | 109 +++++++++++ lib/src/query.c | 369 ++++++++++++++++++++++-------------- 3 files changed, 334 insertions(+), 145 deletions(-) diff --git a/.gitignore b/.gitignore index ed31e54a91..58d73daaba 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ docs/assets/js/tree-sitter.js /target *.rs.bk *.a +*.dylib *.o *.obj *.exp diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index f69074a800..4293c568a9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -608,6 +608,115 @@ fn test_query_matches_with_repeated_leaf_nodes() { }); } +#[test] +fn test_query_matches_with_leading_optional_repeated_leaf_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + (* + (comment)+? @doc + . + (function_declaration + name: (identifier) @name)) + ", + ) + .unwrap(); + + let source = " + function a() { + // one + var b; + + function c() {} + + // two + // three + var d; + + // four + // five + function e() { + + } + } + + // six + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("name", "a")]), + (0, vec![("name", "c")]), + ( + 0, + vec![("doc", "// four"), ("doc", "// five"), ("name", "e")] + ), + ] + ); + }); +} + +#[test] +fn test_query_matches_with_optional_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + (class_declaration + name: (identifier) @class + (class_heritage + (identifier) @superclass)?) + ", + ) + .unwrap(); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + + let source = " + class A {} + "; + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[(0, vec![("class", "A")]),] + ); + + let source = " + class A {} + class B extends C {} + class D extends (E.F) {} + "; + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_matches(matches, &query, source), + &[ + (0, vec![("class", "A")]), + (0, vec![("class", "B"), ("superclass", "C")]), + (0, vec![("class", "D")]), + ] + ); + }); +} + #[test] fn test_query_matches_with_repeated_internal_nodes() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 87ab05b5dd..d67bd43de3 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -35,21 +35,20 @@ typedef struct { * captured in this pattern. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. - * - `repeat_step_index` - If this step is part of a repetition, the index of - * the beginning of the repetition. A `NONE` value means this step is not - * part of a repetition. + * - `alternative_index` - TODO doc */ typedef struct { TSSymbol symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t repeat_step_index; - uint16_t depth: 11; + uint16_t alternative_index; + uint8_t depth; bool contains_captures: 1; bool is_pattern_start: 1; bool is_immediate: 1; bool is_last: 1; - bool is_repeated: 1; + bool is_placeholder: 1; + bool alternative_is_immediate: 1; } QueryStep; /* @@ -96,10 +95,8 @@ typedef struct { uint16_t pattern_index; uint16_t step_index; uint16_t consumed_capture_count; - uint16_t repeat_match_count; - uint16_t step_index_on_failure; uint8_t capture_list_id; - bool seeking_non_match; + bool seeking_immediate_match: 1; } QueryState; typedef Array(TSQueryCapture) CaptureList; @@ -417,12 +414,13 @@ static QueryStep query_step__new( .depth = depth, .field = 0, .capture_ids = {NONE, NONE, NONE, NONE}, + .alternative_index = NONE, .contains_captures = false, - .is_repeated = false, .is_last = false, .is_pattern_start = false, + .is_placeholder = false, .is_immediate = is_immediate, - .repeat_step_index = NONE, + .alternative_is_immediate = false, }; } @@ -511,13 +509,14 @@ static inline bool ts_query__pattern_map_search( static inline void ts_query__pattern_map_insert( TSQuery *self, TSSymbol symbol, - uint32_t start_step_index + uint32_t start_step_index, + uint32_t pattern_index ) { uint32_t index; ts_query__pattern_map_search(self, symbol, &index); array_insert(&self->pattern_map, index, ((PatternEntry) { .step_index = start_step_index, - .pattern_index = self->pattern_map.size, + .pattern_index = pattern_index, })); } @@ -863,8 +862,17 @@ static TSQueryError ts_query__parse_pattern( if (stream->next == '+') { stream_advance(stream); - step->is_repeated = true; - array_back(&self->steps)->repeat_step_index = starting_step_index; + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_placeholder = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + stream_skip_whitespace(stream); + } + + else if (stream->next == '?') { + stream_advance(stream); + step->alternative_index = self->steps.size; stream_skip_whitespace(stream); } @@ -950,6 +958,7 @@ TSQuery *ts_query_new( Stream stream = stream_new(source, source_len); stream_skip_whitespace(&stream); while (stream.input < stream.end) { + uint32_t pattern_index = self->predicates_by_pattern.size; uint32_t start_step_index = self->steps.size; uint32_t capture_count = 0; array_push(&self->start_bytes_by_pattern, stream.input - source); @@ -980,14 +989,18 @@ TSQuery *ts_query_new( } // Maintain a map that can look up patterns for a given root symbol. - self->steps.contents[start_step_index].is_pattern_start = true; - ts_query__pattern_map_insert( - self, - self->steps.contents[start_step_index].symbol, - start_step_index - ); - if (self->steps.contents[start_step_index].symbol == WILDCARD_SYMBOL) { - self->wildcard_root_pattern_count++; + for (;;) { + QueryStep *step = &self->steps.contents[start_step_index]; + step->is_pattern_start = true; + ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index); + if (step->symbol == WILDCARD_SYMBOL) { + self->wildcard_root_pattern_count++; + } + if (step->alternative_index != NONE) { + start_step_index = step->alternative_index; + } else { + break; + } } } @@ -1191,22 +1204,84 @@ static bool ts_query_cursor__first_in_progress_capture( return result; } -static bool ts_query__cursor_add_state( +// Determine which node is first in a depth-first traversal +int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { + if (left.id != right.id) { + uint32_t left_start = ts_node_start_byte(left); + uint32_t right_start = ts_node_start_byte(right); + if (left_start < right_start) return -1; + if (left_start > right_start) return 1; + uint32_t left_node_count = ts_node_end_byte(left); + uint32_t right_node_count = ts_node_end_byte(right); + if (left_node_count > right_node_count) return -1; + if (left_node_count < right_node_count) return 1; + } + return 0; +} + +// Determine if either state contains a superset of the other state's captures. +void ts_query_cursor__compare_captures( TSQueryCursor *self, - const PatternEntry *pattern + QueryState *left_state, + QueryState *right_state, + bool *left_contains_right, + bool *right_contains_left ) { - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - - // If this pattern begins with a repetition, then avoid creating - // new states after already matching the repetition one or more times. - // The query should only one match for the repetition - the one that - // started the earliest. - if (step->is_repeated) { - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i]; - if (state->step_index == pattern->step_index) return true; + CaptureList *left_captures = capture_list_pool_get( + &self->capture_list_pool, + left_state->capture_list_id + ); + CaptureList *right_captures = capture_list_pool_get( + &self->capture_list_pool, + right_state->capture_list_id + ); + *left_contains_right = true; + *right_contains_left = true; + unsigned i = 0, j = 0; + for (;;) { + if (i < left_captures->size) { + if (j < right_captures->size) { + TSQueryCapture *left = &left_captures->contents[i]; + TSQueryCapture *right = &right_captures->contents[j]; + if (left->node.id == right->node.id && left->index == right->index) { + i++; + j++; + } else { + switch (ts_query_cursor__compare_nodes(left->node, right->node)) { + case -1: + *right_contains_left = false; + i++; + break; + case 1: + *left_contains_right = false; + j++; + break; + default: + *right_contains_left = false; + *left_contains_right = false; + i++; + j++; + break; + } + } + } else { + *right_contains_left = false; + break; + } + } else { + if (j < right_captures->size) { + *left_contains_right = false; + } + break; } } +} + +static bool ts_query_cursor__add_state( + TSQueryCursor *self, + const PatternEntry *pattern +) { + QueryStep *step = &self->query->steps.contents[pattern->step_index]; uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool); @@ -1244,21 +1319,23 @@ static bool ts_query__cursor_add_state( .pattern_index = pattern->pattern_index, .start_depth = self->depth - step->depth, .consumed_capture_count = 0, - .repeat_match_count = 0, - .step_index_on_failure = NONE, - .seeking_non_match = false, + .seeking_immediate_match = false, })); return true; } +// Duplicate the given state and insert the newly-created state immediately after +// the given state in the `states` array. static QueryState *ts_query__cursor_copy_state( TSQueryCursor *self, const QueryState *state ) { uint32_t new_list_id = capture_list_pool_acquire(&self->capture_list_pool); if (new_list_id == NONE) return NULL; - array_push(&self->states, *state); - QueryState *new_state = array_back(&self->states); + uint32_t index = (state - self->states.contents) + 1; + QueryState copy = *state; + array_insert(&self->states, index, copy); + QueryState *new_state = &self->states.contents[index]; new_state->capture_list_id = new_list_id; CaptureList *old_captures = capture_list_pool_get( &self->capture_list_pool, @@ -1304,7 +1381,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { self->states.contents[i - deleted_count] = *state; } } - self->states.size -= deleted_count; if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { @@ -1329,8 +1405,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { symbol = self->query->symbol_map[symbol]; } - // If this node is before the selected range, then avoid descending - // into it. + // If this node is before the selected range, then avoid descending into it. if ( ts_node_end_byte(node) <= self->start_byte || point_lte(ts_node_end_point(node), self->start_point) @@ -1369,7 +1444,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query__cursor_add_state(self, pattern)) break; + if (!ts_query_cursor__add_state(self, pattern)) break; } // Add new states for any patterns whose root node matches this node. @@ -1381,7 +1456,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query__cursor_add_state(self, pattern)) break; + if (!ts_query_cursor__add_state(self, pattern)) break; // Advance to the next pattern whose root node matches this node. i++; @@ -1392,7 +1467,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } // Update all of the in-progress states with current node. - for (unsigned i = 0, n = self->states.size; i < n; i++) { + for (unsigned i = 0; i < self->states.size; i++) { QueryState *state = &self->states.contents[i]; QueryStep *step = &self->query->steps.contents[state->step_index]; @@ -1408,7 +1483,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { step->symbol == WILDCARD_SYMBOL || (step->symbol == NAMED_WILDCARD_SYMBOL && is_named); bool later_sibling_can_match = has_later_siblings; - if (step->is_immediate && is_named) { + if ((step->is_immediate && is_named) || state->seeking_immediate_match) { later_sibling_can_match = false; } if (step->is_last && has_later_siblings) { @@ -1425,24 +1500,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } if (!node_does_match) { - // If this QueryState has processed a repeating sequence, and that repeating - // sequence has ended, move on to the *next* step of this state's pattern. - if ( - state->step_index_on_failure != NONE && - (!later_sibling_can_match || step->is_repeated) - ) { - LOG( - " finish repetition state. pattern:%u, step:%u\n", - state->pattern_index, - state->step_index - ); - state->step_index = state->step_index_on_failure; - state->step_index_on_failure = NONE; - state->repeat_match_count = 0; - i--; - continue; - } - if (!later_sibling_can_match) { LOG( " discard state. pattern:%u, step:%u\n", @@ -1455,114 +1512,136 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ); array_erase(&self->states, i); i--; - n--; } - - state->seeking_non_match = false; continue; } - // The `seeking_non_match` flag indicates that a previous QueryState - // has already begun processing this repeating sequence, so that *this* - // QueryState should not begin matching until a separate repeating sequence - // is found. - if (state->seeking_non_match) continue; - - // Some patterns can match their root node in multiple ways, - // capturing different children. If this pattern step could match - // later children within the same parent, then this query state - // cannot simply be updated in place. It must be split into two - // states: one that matches this node, and one which skips over - // this node, to preserve the possibility of matching later - // siblings. - QueryState *next_state = state; + // Some patterns can match their root node in multiple ways, capturing different + // children. If this pattern step could match later children within the same + // parent, then this query state cannot simply be updated in place. It must be + // split into two states: one that matches this node, and one which skips over + // this node, to preserve the possibility of matching later siblings. if ( - !step->is_pattern_start && - step->contains_captures && later_sibling_can_match && - state->repeat_match_count == 0 + !step->is_pattern_start && + step->contains_captures ) { - QueryState *copy = ts_query__cursor_copy_state(self, state); - - // The QueryState that matched this node has begun matching a repeating - // sequence. The QueryState that *skipped* this node should not start - // matching later elements of the same repeating sequence. - if (step->is_repeated) { - state->seeking_non_match = true; - } - - if (copy) { + if (ts_query__cursor_copy_state(self, state)) { LOG( - " split state. pattern:%u, step:%u\n", - copy->pattern_index, - copy->step_index + " split state for capture. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index ); - next_state = copy; - } else { - LOG(" cannot split state.\n"); + i++; } } - // If the current node is captured in this pattern, add it to the - // capture list. + // If the current node is captured in this pattern, add it to the capture list. + CaptureList *capture_list = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { uint16_t capture_id = step->capture_ids[j]; if (step->capture_ids[j] == NONE) break; - CaptureList *capture_list = capture_list_pool_get( - &self->capture_list_pool, - next_state->capture_list_id - ); - array_push(capture_list, ((TSQueryCapture) { - node, - capture_id - })); + array_push(capture_list, ((TSQueryCapture) { node, capture_id })); LOG( " capture node. pattern:%u, capture_id:%u, capture_count:%u\n", - next_state->pattern_index, + state->pattern_index, capture_id, capture_list->size ); } - // If this is the end of a repetition, then jump back to the beginning - // of that repetition. - if (step->repeat_step_index != NONE) { - next_state->step_index_on_failure = next_state->step_index + 1; - next_state->step_index = step->repeat_step_index; - next_state->repeat_match_count++; - LOG( - " continue repeat. pattern:%u, match_count:%u\n", - next_state->pattern_index, - next_state->repeat_match_count - ); - } else { - next_state->step_index++; - LOG( - " advance state. pattern:%u, step:%u\n", - next_state->pattern_index, - next_state->step_index - ); + // Advance this state to the next step of its pattern. + state->step_index++; + state->seeking_immediate_match = false; + LOG( + " advance state. pattern:%u, step:%u\n", + state->pattern_index, + state->step_index + ); - QueryStep *next_step = step + 1; - - // If the pattern is now done, then remove it from the list of - // in-progress states, and add it to the list of finished states. - if (next_step->depth == PATTERN_DONE_MARKER) { - LOG(" finish pattern %u\n", next_state->pattern_index); - - next_state->id = self->next_state_id++; - array_push(&self->finished_states, *next_state); - if (next_state == state) { - array_erase(&self->states, i); - i--; - n--; - } else { - self->states.size--; + // If this state's next step has an 'alternative' step (the step is either optional, + // or is the end of a repetition), then copy the state in order to pursue both + // alternatives. The alternative step itself may have an alternative, so this is + // an interative process. + for (unsigned j = i, end_index = i + 1; j < end_index; j++) { + QueryState *state = &self->states.contents[j]; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->alternative_index != NONE) { + QueryState *copy = ts_query__cursor_copy_state(self, state); + if (copy) { + i++; + end_index++; + copy->step_index = next_step->alternative_index; + if (next_step->alternative_is_immediate) { + copy->seeking_immediate_match = true; + } + LOG( + " split state for branch. pattern:%u, step:%u\n", + copy->pattern_index, + copy->step_index + ); + } + if (next_step->is_placeholder) { + state->step_index++; + j--; } } } } + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i]; + + for (unsigned j = i + 1; j < self->states.size; j++) { + QueryState *other_state = &self->states.contents[j]; + if ( + state->pattern_index == other_state->pattern_index && + state->start_depth == other_state->start_depth && + state->step_index == other_state->step_index + ) { + bool left_contains_right, right_contains_left; + ts_query_cursor__compare_captures( + self, + state, + other_state, + &left_contains_right, + &right_contains_left + ); + if (left_contains_right || right_contains_left) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + if (right_contains_left) { + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->states, i); + i--; + j--; + break; + } else if (left_contains_right) { + capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); + array_erase(&self->states, j); + j--; + } + } + } + } + + // If there the state is at the end of its pattern, remove it from the list + // of in-progress states and add it to the list of finished states. + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->depth == PATTERN_DONE_MARKER) { + LOG(" finish pattern %u\n", state->pattern_index); + state->id = self->next_state_id++; + array_push(&self->finished_states, *state); + array_erase(&self->states, i); + i--; + } + } // Continue descending if possible. if (ts_tree_cursor_goto_first_child(&self->cursor)) { From 1011be76b7cdb97ee7534d72ceb0687fa49b6ebd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 May 2020 10:41:29 -0700 Subject: [PATCH 012/282] Handle trailing optional nodes in queries --- lib/src/query.c | 73 +++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index d67bd43de3..f33b646dca 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -97,6 +97,7 @@ typedef struct { uint16_t consumed_capture_count; uint8_t capture_list_id; bool seeking_immediate_match: 1; + bool skipped_trailing_optional: 1; } QueryState; typedef Array(TSQueryCapture) CaptureList; @@ -1358,20 +1359,31 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if (self->ascending) { LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); - // When leaving a node, remove any unfinished states whose next step - // needed to match something within that node. + // When leaving a node, remove any states that cannot make further progress. uint32_t deleted_count = 0; for (unsigned i = 0, n = self->states.size; i < n; i++) { QueryState *state = &self->states.contents[i]; QueryStep *step = &self->query->steps.contents[state->step_index]; - if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) { + // If a state completed its pattern inside of this node, but was deferred from finishing + // in order to search for longer matches, mark it as finished. + if (step->depth == PATTERN_DONE_MARKER) { + if (state->start_depth == self->depth) { + LOG(" finish pattern %u\n", state->pattern_index); + state->id = self->next_state_id++; + array_push(&self->finished_states, *state); + deleted_count++; + } + } + + // If a state needed to match something within this node, then remove that state + // as it has failed to match. + else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) { LOG( " failed to match. pattern:%u, step:%u\n", state->pattern_index, state->step_index ); - capture_list_pool_release( &self->capture_list_pool, state->capture_list_id @@ -1383,6 +1395,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } self->states.size -= deleted_count; + // Leave this node by stepping to its next sibling or to its parent. if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { self->ascending = false; } else if (ts_tree_cursor_goto_parent(&self->cursor)) { @@ -1391,11 +1404,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { return self->finished_states.size > 0; } } else { - bool has_later_siblings; + bool can_have_later_siblings; bool can_have_later_siblings_with_this_field; TSFieldId field_id = ts_tree_cursor_current_status( &self->cursor, - &has_later_siblings, + &can_have_later_siblings, &can_have_later_siblings_with_this_field ); TSNode node = ts_tree_cursor_current_node(&self->cursor); @@ -1423,17 +1436,12 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ) return false; LOG( - "enter node. " - "type:%s, field:%s, row:%u state_count:%u, " - "finished_state_count:%u, has_later_siblings:%d, " - "can_have_later_siblings_with_this_field:%d\n", + "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", ts_node_type(node), ts_language_field_name_for_id(self->query->language, field_id), ts_node_start_point(node).row, self->states.size, - self->finished_states.size, - has_later_siblings, - can_have_later_siblings_with_this_field + self->finished_states.size ); // Add new states for any patterns whose root node is a wildcard. @@ -1482,11 +1490,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { step->symbol == symbol || step->symbol == WILDCARD_SYMBOL || (step->symbol == NAMED_WILDCARD_SYMBOL && is_named); - bool later_sibling_can_match = has_later_siblings; + bool later_sibling_can_match = can_have_later_siblings; if ((step->is_immediate && is_named) || state->seeking_immediate_match) { later_sibling_can_match = false; } - if (step->is_last && has_later_siblings) { + if (step->is_last && can_have_later_siblings) { node_does_match = false; } if (step->field) { @@ -1499,6 +1507,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } } + // Remove states immediately if it is ever clear that they cannot match. if (!node_does_match) { if (!later_sibling_can_match) { LOG( @@ -1566,11 +1575,17 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // or is the end of a repetition), then copy the state in order to pursue both // alternatives. The alternative step itself may have an alternative, so this is // an interative process. - for (unsigned j = i, end_index = i + 1; j < end_index; j++) { + unsigned start_index = state - self->states.contents; + unsigned end_index = start_index + 1; + for (unsigned j = start_index; j < end_index; j++) { QueryState *state = &self->states.contents[j]; QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->alternative_index != NONE) { QueryState *copy = ts_query__cursor_copy_state(self, state); + if (next_step->is_placeholder) { + state->step_index++; + j--; + } if (copy) { i++; end_index++; @@ -1579,15 +1594,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { copy->seeking_immediate_match = true; } LOG( - " split state for branch. pattern:%u, step:%u\n", + " split state for branch. pattern:%u, step:%u, step:%u\n", copy->pattern_index, + state->step_index, copy->step_index ); } - if (next_step->is_placeholder) { - state->step_index++; - j--; - } + } else if (next_step->depth == PATTERN_DONE_MARKER && j > start_index) { + state->skipped_trailing_optional = true; } } } @@ -1595,6 +1609,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { for (unsigned i = 0; i < self->states.size; i++) { QueryState *state = &self->states.contents[i]; + // Enfore the longest-match criteria. When a query pattern contains optional or + // repeated nodes, this is necesssary to avoid multiple redundant states, where + // one state has a strict subset of another state's captures. for (unsigned j = i + 1; j < self->states.size; j++) { QueryState *other_state = &self->states.contents[j]; if ( @@ -1635,11 +1652,15 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // of in-progress states and add it to the list of finished states. QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->depth == PATTERN_DONE_MARKER) { - LOG(" finish pattern %u\n", state->pattern_index); - state->id = self->next_state_id++; - array_push(&self->finished_states, *state); - array_erase(&self->states, i); - i--; + if (state->skipped_trailing_optional) { + LOG(" defer finishing pattern %u\n", state->pattern_index); + } else { + LOG(" finish pattern %u\n", state->pattern_index); + state->id = self->next_state_id++; + array_push(&self->finished_states, *state); + array_erase(&self->states, i); + i--; + } } } From 3ad71625dd00af29db566c70bccefaaea1466d3c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 May 2020 14:22:15 -0700 Subject: [PATCH 013/282] Fix query bugs, expand and clean up query tests --- cli/src/tests/query_test.rs | 373 +++++++++++++++--------------------- lib/src/query.c | 186 +++++++++++------- 2 files changed, 271 insertions(+), 288 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 4293c568a9..d4ebd884ed 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2,8 +2,8 @@ use super::helpers::allocations; use super::helpers::fixtures::get_language; use std::fmt::Write; use tree_sitter::{ - Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch, QueryPredicate, - QueryPredicateArg, QueryProperty, + Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch, + QueryPredicate, QueryPredicateArg, QueryProperty, }; #[test] @@ -163,19 +163,13 @@ fn test_query_matches_with_simple_pattern() { ) .unwrap(); - let source = "function one() { two(); function three() {} }"; - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "function one() { two(); function three() {} }", &[ (0, vec![("fn-name", "one")]), - (0, vec![("fn-name", "three")]) + (0, vec![("fn-name", "three")]), ], ); }); @@ -195,7 +189,10 @@ fn test_query_matches_with_multiple_on_same_root() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " class Person { // the constructor constructor(name) { this.name = name; } @@ -203,30 +200,21 @@ fn test_query_matches_with_multiple_on_same_root() { // the getter getFullName() { return this.name; } } - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ ( 0, vec![ ("the-class-name", "Person"), - ("the-method-name", "constructor") - ] + ("the-method-name", "constructor"), + ], ), ( 0, vec![ ("the-class-name", "Person"), - ("the-method-name", "getFullName") - ] + ("the-method-name", "getFullName"), + ], ), ], ); @@ -246,20 +234,14 @@ fn test_query_matches_with_multiple_patterns_different_roots() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " function f1() { f2(f3()); } - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (0, vec![("fn-def", "f1")]), (1, vec![("fn-ref", "f2")]), @@ -287,21 +269,15 @@ fn test_query_matches_with_multiple_patterns_same_root() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " a = { b: () => { return c; }, d: function() { return d; } }; - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (1, vec![("method-def", "b")]), (0, vec![("method-def", "d")]), @@ -325,20 +301,14 @@ fn test_query_matches_with_nesting_and_no_fields() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " [[a]]; [[c, d], [e, f, g, h]]; [[h], [i]]; - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (0, vec![("x1", "c"), ("x2", "d")]), (0, vec![("x1", "e"), ("x2", "f")]), @@ -358,17 +328,11 @@ fn test_query_matches_with_many() { let language = get_language("javascript"); let query = Query::new(language, "(array (identifier) @element)").unwrap(); - let source = "[hello];\n".repeat(50); - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); - - assert_eq!( - collect_matches(matches, &query, source.as_str()), - vec![(0, vec![("element", "hello")]); 50], + assert_query_matches( + language, + &query, + &"[hello];\n".repeat(50), + &vec![(0, vec![("element", "hello")]); 50], ); }); } @@ -385,20 +349,11 @@ fn test_query_matches_capturing_error_nodes() { ) .unwrap(); - let source = "function a(b,, c, d :e:) {}"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), - &[( - 0, - vec![("the-error", ":e:"), ("the-error-identifier", "e"),] - ),] + assert_query_matches( + language, + &query, + "function a(b,, c, d :e:) {}", + &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])], ); }); } @@ -439,10 +394,6 @@ fn test_query_matches_with_named_wildcard() { fn test_query_matches_with_wildcard_at_the_root() { allocations::record(|| { let language = get_language("javascript"); - let mut cursor = QueryCursor::new(); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let query = Query::new( language, " @@ -455,13 +406,11 @@ fn test_query_matches_with_wildcard_at_the_root() { ) .unwrap(); - let source = "/* one */ var x; /* two */ function y() {} /* three */ class Z {}"; - - let tree = parser.parse(source, None).unwrap(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - assert_eq!( - collect_matches(matches, &query, source), - &[(0, vec![("doc", "/* two */"), ("name", "y")]),] + assert_query_matches( + language, + &query, + "/* one */ var x; /* two */ function y() {} /* three */ class Z {}", + &[(0, vec![("doc", "/* two */"), ("name", "y")])], ); let query = Query::new( @@ -475,17 +424,15 @@ fn test_query_matches_with_wildcard_at_the_root() { ) .unwrap(); - let source = "['hi', x(true), {y: false}]"; - - let tree = parser.parse(source, None).unwrap(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "['hi', x(true), {y: false}]", &[ (0, vec![("a", "'hi'")]), (2, vec![("c", "true")]), (3, vec![("d", "false")]), - ] + ], ); }); } @@ -519,16 +466,10 @@ fn test_query_matches_with_immediate_siblings() { ) .unwrap(); - let source = "import a.b.c.d; return [w, [1, y], z]"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "import a.b.c.d; return [w, [1, y], z]", &[ (0, vec![("parent", "a"), ("child", "b")]), (0, vec![("parent", "b"), ("child", "c")]), @@ -536,7 +477,7 @@ fn test_query_matches_with_immediate_siblings() { (0, vec![("parent", "c"), ("child", "d")]), (2, vec![("first-element", "w")]), (2, vec![("first-element", "1")]), - ] + ], ); }); } @@ -564,7 +505,10 @@ fn test_query_matches_with_repeated_leaf_nodes() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " // one // two a(); @@ -582,16 +526,7 @@ fn test_query_matches_with_repeated_leaf_nodes() { // eight function d() {} } - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ ( 0, @@ -599,11 +534,31 @@ fn test_query_matches_with_repeated_leaf_nodes() { ("doc", "// four"), ("doc", "// five"), ("doc", "// six"), - ("name", "B") - ] + ("name", "B"), + ], ), (1, vec![("doc", "// eight"), ("name", "d")]), - ] + ], + ); + }); +} + +#[test] +fn test_query_matches_with_optional_nodes_inside_of_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap(); + + assert_query_matches( + language, + &query, + r#" + var a = [1, 2, 3, 4] + "#, + &[( + 0, + vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")], + )], ); }); } @@ -625,43 +580,37 @@ fn test_query_matches_with_leading_optional_repeated_leaf_nodes() { ) .unwrap(); - let source = " - function a() { - // one - var b; + assert_query_matches( + language, + &query, + " + function a() { + // one + var b; - function c() {} + function c() {} - // two - // three - var d; + // two + // three + var d; - // four - // five - function e() { + // four + // five + function e() { + } } - } - - // six - "; - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + // six + ", &[ (0, vec![("name", "a")]), (0, vec![("name", "c")]), ( 0, - vec![("doc", "// four"), ("doc", "// five"), ("name", "e")] + vec![("doc", "// four"), ("doc", "// five"), ("name", "e")], ), - ] + ], ); }); } @@ -682,37 +631,21 @@ fn test_query_matches_with_optional_nodes() { ) .unwrap(); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]); - let source = " - class A {} - "; - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), - &[(0, vec![("class", "A")]),] - ); - - let source = " + assert_query_matches( + language, + &query, + " class A {} class B extends C {} class D extends (E.F) {} - "; - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (0, vec![("class", "A")]), (0, vec![("class", "B"), ("superclass", "C")]), (0, vec![("class", "D")]), - ] + ], ); }); } @@ -721,10 +654,6 @@ fn test_query_matches_with_optional_nodes() { fn test_query_matches_with_repeated_internal_nodes() { allocations::record(|| { let language = get_language("javascript"); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let mut cursor = QueryCursor::new(); - let query = Query::new( language, " @@ -735,18 +664,18 @@ fn test_query_matches_with_repeated_internal_nodes() { ", ) .unwrap(); - let source = " + + assert_query_matches( + language, + &query, + " class A { @c @d e() {} } - "; - let tree = parser.parse(source, None).unwrap(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - assert_eq!( - collect_matches(matches, &query, source), - &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")]),] + ", + &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])], ); }) } @@ -760,20 +689,16 @@ fn test_query_matches_in_language_with_simple_aliases() { // tag names, script tag names, and style tag names. All of // these tokens are aliased to `tag_name`. let query = Query::new(language, "(tag_name) @tag").unwrap(); - let source = " + + assert_query_matches( + language, + &query, + "
-
"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); - - assert_eq!( - collect_matches(matches, &query, source), + + ", &[ (0, vec![("tag", "div")]), (0, vec![("tag", "script")]), @@ -789,6 +714,8 @@ fn test_query_matches_in_language_with_simple_aliases() { #[test] fn test_query_matches_with_different_tokens_with_the_same_string_value() { allocations::record(|| { + // In Rust, there are two '<' tokens: one for the binary operator, + // and one with higher precedence for generics. let language = get_language("rust"); let query = Query::new( language, @@ -799,24 +726,16 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { ) .unwrap(); - // In Rust, there are two '<' tokens: one for the binary operator, - // and one with higher precedence for generics. - let source = "const A: B = d < e || f > g;"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "const A: B = d < e || f > g;", &[ (0, vec![("less", "<")]), (1, vec![("greater", ">")]), (0, vec![("less", "<")]), (1, vec![("greater", ">")]), - ] + ], ); }); } @@ -866,20 +785,14 @@ fn test_query_matches_with_anonymous_tokens() { ) .unwrap(); - let source = "foo(a && b);"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "foo(a && b);", &[ (1, vec![("operator", "&&")]), (0, vec![("punctuation", ";")]), - ] + ], ); }); } @@ -1772,6 +1685,20 @@ fn test_query_disable_pattern() { }); } +fn assert_query_matches( + language: Language, + query: &Query, + source: &str, + expected: &[(usize, Vec<(&str, &str)>)], +) { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!(collect_matches(matches, &query, source), expected); +} + fn collect_matches<'a>( matches: impl Iterator>, query: &'a Query, diff --git a/lib/src/query.c b/lib/src/query.c index f33b646dca..29c9e83747 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -35,7 +35,8 @@ typedef struct { * captured in this pattern. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. - * - `alternative_index` - TODO doc + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. */ typedef struct { TSSymbol symbol; @@ -87,7 +88,25 @@ typedef struct { * QueryState - The state of an in-progress match of a particular pattern * in a query. While executing, a `TSQueryCursor` must keep track of a number * of possible in-progress matches. Each of those possible matches is - * represented as one of these states. + * represented as one of these states. Fields: + * - `id` - A numeric id that is exposed to the public API. This allows the + * caller to remove a given match, preventing any more of its captures + * from being returned. + * - `start_depth` - The depth in the tree where the first step of the state's + * pattern was matched. + * - `pattern_index` - The pattern that the state is matching. + * - `consumed_capture_count` - The number of captures from this match that + * have already been returned. + * - `capture_list_id` - A numeric id that can be used to retrieve the state's + * list of captures from the `CaptureListPool`. + * - `seeking_immediate_match` - A flag that indicates that the state's next + * step must be matched by the very next sibling. This is used when + * processing repetitions. + * - `skipped_trailing_optional` - A flag that indicates that there is an + * optional node at the end of this state's pattern, and this state did + * *not* match that node. In order to obey the 'longest-match' rule, this + * match should not be returned until it is clear that there can be no + * longer match. */ typedef struct { uint32_t id; @@ -689,7 +708,7 @@ static TSQueryError ts_query__parse_pattern( stream_advance(stream); stream_skip_whitespace(stream); - // Parse a nested list, which represents a pattern followed by + // At the top-level, a nested list represents one root pattern followed by // zero-or-more predicates. if (stream->next == '(' && depth == 0) { TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count, is_immediate); @@ -709,65 +728,94 @@ static TSQueryError ts_query__parse_pattern( } } - TSSymbol symbol; - - // Parse the wildcard symbol - if (stream->next == '*') { - symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; - stream_advance(stream); - } + // When nested inside of a larger pattern, a nested list just represents + // multiple sibling nodes which are grouped, possibly so that a postfix + // operator can be applied to the group. + else if (depth > 0 && (stream->next == '(' || stream->next == '"' )) { + bool child_is_immediate = false; + for (;;) { + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + capture_count, + child_is_immediate + ); + if (e == PARENT_DONE) { + stream_advance(stream); + break; + } else if (e) { + return e; + } - // Parse a normal node name - else if (stream_is_ident_start(stream)) { - const char *node_name = stream->input; - stream_scan_identifier(stream); - uint32_t length = stream->input - node_name; - symbol = ts_language_symbol_for_name( - self->language, - node_name, - length, - true - ); - if (!symbol) { - stream_reset(stream, node_name); - return TSQueryErrorNodeType; + child_is_immediate = false; } } else { - return TSQueryErrorSyntax; - } + TSSymbol symbol; - // Add a step for the node. - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - - // Parse the child patterns - stream_skip_whitespace(stream); - bool child_is_immediate = false; - uint16_t child_start_step_index = self->steps.size; - for (;;) { - if (stream->next == '.') { - child_is_immediate = true; + // Parse the wildcard symbol + if (stream->next == '*') { + symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; stream_advance(stream); - stream_skip_whitespace(stream); } - TSQueryError e = ts_query__parse_pattern( - self, - stream, - depth + 1, - capture_count, - child_is_immediate - ); - if (e == PARENT_DONE) { - if (child_is_immediate) { - self->steps.contents[child_start_step_index].is_last = true; + // Parse a normal node name + else if (stream_is_ident_start(stream)) { + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - node_name; + symbol = ts_language_symbol_for_name( + self->language, + node_name, + length, + true + ); + if (!symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; } - stream_advance(stream); - break; - } else if (e) { - return e; + } else { + return TSQueryErrorSyntax; } - child_is_immediate = false; + // Add a step for the node. + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + + // Parse the child patterns + stream_skip_whitespace(stream); + bool child_is_immediate = false; + uint16_t child_start_step_index = self->steps.size; + for (;;) { + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth + 1, + capture_count, + child_is_immediate + ); + if (e == PARENT_DONE) { + if (child_is_immediate) { + self->steps.contents[child_start_step_index].is_last = true; + } + stream_advance(stream); + break; + } else if (e) { + return e; + } + + child_is_immediate = false; + } } } @@ -1577,6 +1625,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // an interative process. unsigned start_index = state - self->states.contents; unsigned end_index = start_index + 1; + bool is_alternative = false; for (unsigned j = start_index; j < end_index; j++) { QueryState *state = &self->states.contents[j]; QueryStep *next_step = &self->query->steps.contents[state->step_index]; @@ -1600,14 +1649,19 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { copy->step_index ); } - } else if (next_step->depth == PATTERN_DONE_MARKER && j > start_index) { - state->skipped_trailing_optional = true; } + + if ( + (next_step->alternative_index != NONE || is_alternative) && + next_step->depth == PATTERN_DONE_MARKER + ) state->skipped_trailing_optional = true; + is_alternative = true; } } for (unsigned i = 0; i < self->states.size; i++) { QueryState *state = &self->states.contents[i]; + bool did_remove = false; // Enfore the longest-match criteria. When a query pattern contains optional or // repeated nodes, this is necesssary to avoid multiple redundant states, where @@ -1636,7 +1690,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if (right_contains_left) { capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); array_erase(&self->states, i); - i--; + did_remove = true; j--; break; } else if (left_contains_right) { @@ -1650,16 +1704,18 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If there the state is at the end of its pattern, remove it from the list // of in-progress states and add it to the list of finished states. - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->depth == PATTERN_DONE_MARKER) { - if (state->skipped_trailing_optional) { - LOG(" defer finishing pattern %u\n", state->pattern_index); - } else { - LOG(" finish pattern %u\n", state->pattern_index); - state->id = self->next_state_id++; - array_push(&self->finished_states, *state); - array_erase(&self->states, i); - i--; + if (!did_remove) { + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (next_step->depth == PATTERN_DONE_MARKER) { + if (state->skipped_trailing_optional) { + LOG(" defer finishing pattern %u\n", state->pattern_index); + } else { + LOG(" finish pattern %u\n", state->pattern_index); + state->id = self->next_state_id++; + array_push(&self->finished_states, *state); + array_erase(&self->states, state - self->states.contents); + i--; + } } } } From b47c170c75cf6037818b52503dd106427d0146f5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 May 2020 12:10:01 -0700 Subject: [PATCH 014/282] Query: fix bugs and add tests for top-level and nested repetitions --- cli/src/tests/query_test.rs | 68 ++++++++++++++++++++++++++++++++++++- lib/src/query.c | 67 ++++++++++++++++++++---------------- 2 files changed, 104 insertions(+), 31 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index d4ebd884ed..13fd1dca26 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -563,6 +563,72 @@ fn test_query_matches_with_optional_nodes_inside_of_repetitions() { }); } +#[test] +fn test_query_matches_with_top_level_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (comment)+ @doc + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + // a + // b + // c + + d() + + // e + "#, + &[ + (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]), + (0, vec![("doc", "// e")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_nested_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (variable_declaration + (","? (variable_declarator name: (identifier) @x))+)+ + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + var a = b, c, d + var e, f + + // more + var g + "#, + &[ + ( + 0, + vec![("x", "a"), ("x", "c"), ("x", "d"), ("x", "e"), ("x", "f")], + ), + (0, vec![("x", "g")]), + ], + ); + }); +} + #[test] fn test_query_matches_with_leading_optional_repeated_leaf_nodes() { allocations::record(|| { @@ -616,7 +682,7 @@ fn test_query_matches_with_leading_optional_repeated_leaf_nodes() { } #[test] -fn test_query_matches_with_optional_nodes() { +fn test_query_matches_with_trailing_optional_nodes() { allocations::record(|| { let language = get_language("javascript"); diff --git a/lib/src/query.c b/lib/src/query.c index 29c9e83747..6d08f68bb0 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -102,11 +102,11 @@ typedef struct { * - `seeking_immediate_match` - A flag that indicates that the state's next * step must be matched by the very next sibling. This is used when * processing repetitions. - * - `skipped_trailing_optional` - A flag that indicates that there is an - * optional node at the end of this state's pattern, and this state did - * *not* match that node. In order to obey the 'longest-match' rule, this - * match should not be returned until it is clear that there can be no - * longer match. + * - `has_in_progress_alternatives` - A flag that indicates that there is are + * other states that have the same captures as this state, but are at + * different steps in their pattern. This means that in order to obey the + * 'longest-match' rule, this state should not be returned as a match until + * it is clear that there can be no longer match. */ typedef struct { uint32_t id; @@ -116,7 +116,7 @@ typedef struct { uint16_t consumed_capture_count; uint8_t capture_list_id; bool seeking_immediate_match: 1; - bool skipped_trailing_optional: 1; + bool has_in_progress_alternatives: 1; } QueryState; typedef Array(TSQueryCapture) CaptureList; @@ -1416,11 +1416,12 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If a state completed its pattern inside of this node, but was deferred from finishing // in order to search for longer matches, mark it as finished. if (step->depth == PATTERN_DONE_MARKER) { - if (state->start_depth == self->depth) { + if (state->start_depth > self->depth) { LOG(" finish pattern %u\n", state->pattern_index); state->id = self->next_state_id++; array_push(&self->finished_states, *state); deleted_count++; + continue; } } @@ -1437,7 +1438,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { state->capture_list_id ); deleted_count++; - } else if (deleted_count > 0) { + continue; + } + + if (deleted_count > 0) { self->states.contents[i - deleted_count] = *state; } } @@ -1526,6 +1530,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { for (unsigned i = 0; i < self->states.size; i++) { QueryState *state = &self->states.contents[i]; QueryStep *step = &self->query->steps.contents[state->step_index]; + state->has_in_progress_alternatives = false; // Check that the node matches all of the criteria for the next // step of the pattern. @@ -1625,7 +1630,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // an interative process. unsigned start_index = state - self->states.contents; unsigned end_index = start_index + 1; - bool is_alternative = false; for (unsigned j = start_index; j < end_index; j++) { QueryState *state = &self->states.contents[j]; QueryStep *next_step = &self->query->steps.contents[state->step_index]; @@ -1650,12 +1654,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ); } } - - if ( - (next_step->alternative_index != NONE || is_alternative) && - next_step->depth == PATTERN_DONE_MARKER - ) state->skipped_trailing_optional = true; - is_alternative = true; } } @@ -1670,8 +1668,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { QueryState *other_state = &self->states.contents[j]; if ( state->pattern_index == other_state->pattern_index && - state->start_depth == other_state->start_depth && - state->step_index == other_state->step_index + state->start_depth == other_state->start_depth ) { bool left_contains_right, right_contains_left; ts_query_cursor__compare_captures( @@ -1681,23 +1678,33 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { &left_contains_right, &right_contains_left ); - if (left_contains_right || right_contains_left) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - if (right_contains_left) { + if (left_contains_right) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); + array_erase(&self->states, j); + j--; + continue; + } + other_state->has_in_progress_alternatives = true; + } + if (right_contains_left) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); array_erase(&self->states, i); did_remove = true; - j--; break; - } else if (left_contains_right) { - capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, j); - j--; } + state->has_in_progress_alternatives = true; } } } @@ -1707,7 +1714,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if (!did_remove) { QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->depth == PATTERN_DONE_MARKER) { - if (state->skipped_trailing_optional) { + if (state->has_in_progress_alternatives) { LOG(" defer finishing pattern %u\n", state->pattern_index); } else { LOG(" finish pattern %u\n", state->pattern_index); From b0671aea6a35adbecd644f5dd288a0f025b04533 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 May 2020 12:13:21 -0700 Subject: [PATCH 015/282] Reorder some code in ts_query_cursor__advance --- lib/src/query.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 6d08f68bb0..47a5ed8c1c 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1456,21 +1456,8 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { return self->finished_states.size > 0; } } else { - bool can_have_later_siblings; - bool can_have_later_siblings_with_this_field; - TSFieldId field_id = ts_tree_cursor_current_status( - &self->cursor, - &can_have_later_siblings, - &can_have_later_siblings_with_this_field - ); - TSNode node = ts_tree_cursor_current_node(&self->cursor); - TSSymbol symbol = ts_node_symbol(node); - bool is_named = ts_node_is_named(node); - if (symbol != ts_builtin_sym_error && self->query->symbol_map) { - symbol = self->query->symbol_map[symbol]; - } - // If this node is before the selected range, then avoid descending into it. + TSNode node = ts_tree_cursor_current_node(&self->cursor); if ( ts_node_end_byte(node) <= self->start_byte || point_lte(ts_node_end_point(node), self->start_point) @@ -1487,6 +1474,19 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { point_lte(self->end_point, ts_node_start_point(node)) ) return false; + // Get the properties of the current node. + TSSymbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); + if (symbol != ts_builtin_sym_error && self->query->symbol_map) { + symbol = self->query->symbol_map[symbol]; + } + bool can_have_later_siblings; + bool can_have_later_siblings_with_this_field; + TSFieldId field_id = ts_tree_cursor_current_status( + &self->cursor, + &can_have_later_siblings, + &can_have_later_siblings_with_this_field + ); LOG( "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", ts_node_type(node), From 9c0535cea6a9332fe7a0b0ad0bdab235b9c28a76 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 May 2020 14:15:25 -0700 Subject: [PATCH 016/282] Fix logic for aborting failed matches --- cli/src/tests/query_test.rs | 27 +++++++++++++++++++++++++++ lib/src/query.c | 21 +++++++++++++-------- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 13fd1dca26..a82122f879 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -595,6 +595,33 @@ fn test_query_matches_with_top_level_repetitions() { }); } +#[test] +fn test_query_matches_with_non_terminal_repetitions_within_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (* + (expression_statement + (identifier) @id)+) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + a; + b; + c; + "#, + &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])], + ); + }); +} + #[test] fn test_query_matches_with_nested_repetitions() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 47a5ed8c1c..801b98e21c 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1407,7 +1407,17 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if (self->ascending) { LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); - // When leaving a node, remove any states that cannot make further progress. + // Leave this node by stepping to its next sibling or to its parent. + bool did_move = true; + if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { + self->ascending = false; + } else if (ts_tree_cursor_goto_parent(&self->cursor)) { + self->depth--; + } else { + did_move = false; + } + + // After leaving a node, remove any states that cannot make further progress. uint32_t deleted_count = 0; for (unsigned i = 0, n = self->states.size; i < n; i++) { QueryState *state = &self->states.contents[i]; @@ -1416,7 +1426,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If a state completed its pattern inside of this node, but was deferred from finishing // in order to search for longer matches, mark it as finished. if (step->depth == PATTERN_DONE_MARKER) { - if (state->start_depth > self->depth) { + if (state->start_depth > self->depth || !did_move) { LOG(" finish pattern %u\n", state->pattern_index); state->id = self->next_state_id++; array_push(&self->finished_states, *state); @@ -1447,12 +1457,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } self->states.size -= deleted_count; - // Leave this node by stepping to its next sibling or to its parent. - if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { - self->ascending = false; - } else if (ts_tree_cursor_goto_parent(&self->cursor)) { - self->depth--; - } else { + if (!did_move) { return self->finished_states.size > 0; } } else { From b14f564550c7db684598b10d8365c43671c6bab1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 May 2020 10:42:48 -0700 Subject: [PATCH 017/282] Add a test for nested optionals in queries --- cli/src/tests/query_test.rs | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index a82122f879..0d078411b5 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -743,6 +743,48 @@ fn test_query_matches_with_trailing_optional_nodes() { }); } +#[test] +fn test_query_matches_with_nested_optional_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + // A function call, optionally containing a function call, which optionally contains a number + let query = Query::new( + language, + " + (call_expression + function: (identifier) @outer-fn + arguments: (arguments + (call_expression + function: (identifier) @inner-fn + arguments: (arguments + (number)? @num))?)) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + a(b, c(), d(null, 1, 2)) + e() + f(g()) + "#, + &[ + (0, vec![("outer-fn", "a"), ("inner-fn", "c")]), + (0, vec![("outer-fn", "c")]), + (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "1")]), + (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "2")]), + (0, vec![("outer-fn", "d")]), + (0, vec![("outer-fn", "e")]), + (0, vec![("outer-fn", "f"), ("inner-fn", "g")]), + (0, vec![("outer-fn", "g")]), + ], + ); + }); +} + #[test] fn test_query_matches_with_repeated_internal_nodes() { allocations::record(|| { From 40262483a91e3031c50f0cd65564a78b63a4d7c1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 May 2020 12:35:51 -0700 Subject: [PATCH 018/282] Change query syntax for predicates Signed-off-by: Patrick Thomson --- cli/src/tests/query_test.rs | 60 +++++++++++++++++++++++-------------- cli/src/tests/tags_test.rs | 56 ++++++++++++++++++++-------------- lib/binding_rust/lib.rs | 10 +++---- lib/src/query.c | 58 ++++++++++++++++++----------------- 4 files changed, 107 insertions(+), 77 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 0d078411b5..945b3b1f06 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -82,18 +82,29 @@ fn test_query_errors_on_invalid_syntax() { 1, [ "((identifier) ()", // - " ^", + " ^", ] .join("\n") )) ); assert_eq!( - Query::new(language, r#"((identifier) @x (eq? @x a"#), + Query::new(language, r#"((identifier) (#a)"#), Err(QueryError::Syntax( 1, [ - r#"((identifier) @x (eq? @x a"#, - r#" ^"#, + "((identifier) (#a)", // + " ^", + ] + .join("\n") + )) + ); + assert_eq!( + Query::new(language, r#"((identifier) @x (#eq? @x a"#), + Err(QueryError::Syntax( + 1, + [ + r#"((identifier) @x (#eq? @x a"#, + r#" ^"#, ] .join("\n") )) @@ -136,18 +147,23 @@ fn test_query_errors_on_invalid_conditions() { assert_eq!( Query::new(language, "((identifier) @id (@id))"), - Err(QueryError::Predicate( - "Expected predicate to start with a function name. Got @id.".to_string() + Err(QueryError::Syntax( + 1, + [ + "((identifier) @id (@id))", // + " ^" + ] + .join("\n") )) ); assert_eq!( - Query::new(language, "((identifier) @id (eq? @id))"), + Query::new(language, "((identifier) @id (#eq? @id))"), Err(QueryError::Predicate( - "Wrong number of arguments to eq? predicate. Expected 2, got 1.".to_string() + "Wrong number of arguments to #eq? predicate. Expected 2, got 1.".to_string() )) ); assert_eq!( - Query::new(language, "((identifier) @id (eq? @id @ok))"), + Query::new(language, "((identifier) @id (#eq? @id @ok))"), Err(QueryError::Capture(1, "ok".to_string())) ); }); @@ -1158,13 +1174,13 @@ fn test_query_captures_with_text_conditions() { language, r#" ((identifier) @constant - (match? @constant "^[A-Z]{2,}$")) + (#match? @constant "^[A-Z]{2,}$")) ((identifier) @constructor - (match? @constructor "^[A-Z]")) + (#match? @constructor "^[A-Z]")) ((identifier) @function.builtin - (eq? @function.builtin "require")) + (#eq? @function.builtin "require")) (identifier) @variable "#, @@ -1207,13 +1223,13 @@ fn test_query_captures_with_predicates() { language, r#" ((call_expression (identifier) @foo) - (set! name something) - (set! cool) - (something! @foo omg)) + (#set! name something) + (#set! cool) + (#something! @foo omg)) ((property_identifier) @bar - (is? cool) - (is-not? name something))"#, + (#is? cool) + (#is-not? name something))"#, ) .unwrap(); @@ -1259,13 +1275,13 @@ fn test_query_captures_with_quoted_predicate_args() { language, r#" ((call_expression (identifier) @foo) - (set! one "\"something\ngreat\"")) + (#set! one "\"something\ngreat\"")) ((identifier) - (set! two "\\s(\r?\n)*$")) + (#set! two "\\s(\r?\n)*$")) ((function_declaration) - (set! three "\"something\ngreat\"")) + (#set! three "\"something\ngreat\"")) "#, ) .unwrap(); @@ -1403,7 +1419,7 @@ fn test_query_captures_with_many_nested_results_with_fields() { consequence: (member_expression object: (identifier) @right) alternative: (null)) - (eq? @left @right)) + (#eq? @left @right)) "#, ) .unwrap(); @@ -1689,7 +1705,7 @@ fn test_query_start_byte_for_pattern() { .trim_start(); let patterns_3 = " - ((identifier) @b (match? @b i)) + ((identifier) @b (#match? @b i)) (function_declaration name: (identifier) @c) (method_definition name: (identifier) @d) " diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 41907a3c55..9bfd1f5631 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -6,46 +6,58 @@ use tree_sitter_tags::c_lib as c; use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" -((function_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function - (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) +( + (function_definition + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @function + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + (function_definition name: (identifier) @name) @function -((class_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @class - (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) + +( + (class_definition + name: (identifier) @name + body: (block + . (expression_statement (string) @doc))) @class + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + (class_definition name: (identifier) @name) @class + (call function: (identifier) @name) @call "#; const JS_TAG_QUERY: &'static str = r#" -((* +( (comment)+ @doc . (class_declaration - name: (identifier) @name) @class) - (select-adjacent! @doc @class) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (identifier) @name) @class + (#select-adjacent! @doc @class) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -((* +( (comment)+ @doc . (method_definition - name: (property_identifier) @name) @method) - (select-adjacent! @doc @method) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (property_identifier) @name) @method + (#select-adjacent! @doc @method) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -((* +( (comment)+ @doc . (function_declaration - name: (identifier) @name) @function) - (select-adjacent! @doc @function) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (identifier) @name) @function + (#select-adjacent! @doc @function) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) (call_expression function: (identifier) @name) @call - "#; +"#; const RUBY_TAG_QUERY: &'static str = r#" (method @@ -55,7 +67,7 @@ const RUBY_TAG_QUERY: &'static str = r#" method: (identifier) @name) @call ((identifier) @name @call - (is-not? local)) + (#is-not? local)) "#; #[test] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index a13d9168a7..c0aba32f01 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1271,13 +1271,13 @@ impl Query { "eq?" | "not-eq?" => { if p.len() != 3 { return Err(QueryError::Predicate(format!( - "Wrong number of arguments to eq? predicate. Expected 2, got {}.", + "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { return Err(QueryError::Predicate(format!( - "First argument to eq? predicate must be a capture name. Got literal \"{}\".", + "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } @@ -1301,19 +1301,19 @@ impl Query { "match?" => { if p.len() != 3 { return Err(QueryError::Predicate(format!( - "Wrong number of arguments to match? predicate. Expected 2, got {}.", + "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { return Err(QueryError::Predicate(format!( - "First argument to match? predicate must be a capture name. Got literal \"{}\".", + "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } if p[2].type_ == type_capture { return Err(QueryError::Predicate(format!( - "Second argument to match? predicate must be a literal. Got capture @{}.", + "Second argument to #match? predicate must be a literal. Got capture @{}.", result.capture_names[p[2].value_id as usize], ))); } diff --git a/lib/src/query.c b/lib/src/query.c index 801b98e21c..49cbb92f07 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -567,8 +567,20 @@ static TSQueryError ts_query__parse_predicate( TSQuery *self, Stream *stream ) { - if (stream->next == ')') return PARENT_DONE; - if (stream->next != '(') return TSQueryErrorSyntax; + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *predicate_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - predicate_name; + uint16_t id = symbol_table_insert_name( + &self->predicate_values, + predicate_name, + length + ); + array_back(&self->predicates_by_pattern)->length++; + array_push(&self->predicate_steps, ((TSQueryPredicateStep) { + .type = TSQueryPredicateStepTypeString, + .value_id = id, + })); stream_advance(stream); stream_skip_whitespace(stream); @@ -703,35 +715,16 @@ static TSQueryError ts_query__parse_pattern( return PARENT_DONE; } - // Parse a parenthesized node expression + // Parse either: + // * A parenthesized sequence of nodes + // * A predicate + // * A named node else if (stream->next == '(') { stream_advance(stream); stream_skip_whitespace(stream); - // At the top-level, a nested list represents one root pattern followed by - // zero-or-more predicates. - if (stream->next == '(' && depth == 0) { - TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count, is_immediate); - if (e) return e; - - // Parse the predicates. - stream_skip_whitespace(stream); - for (;;) { - TSQueryError e = ts_query__parse_predicate(self, stream); - if (e == PARENT_DONE) { - stream_advance(stream); - stream_skip_whitespace(stream); - return 0; - } else if (e) { - return e; - } - } - } - - // When nested inside of a larger pattern, a nested list just represents - // multiple sibling nodes which are grouped, possibly so that a postfix - // operator can be applied to the group. - else if (depth > 0 && (stream->next == '(' || stream->next == '"' )) { + // If this parenthesis is followed by a node, then it represents grouping. + if (stream->next == '(' || stream->next == '"') { bool child_is_immediate = false; for (;;) { if (stream->next == '.') { @@ -755,7 +748,16 @@ static TSQueryError ts_query__parse_pattern( child_is_immediate = false; } - } else { + } + + // This parenthesis is the start of a predicate + else if (stream->next == '#') { + stream_advance(stream); + return ts_query__parse_predicate(self, stream); + } + + // Otherwise, this parenthesis is the start of a named node. + else { TSSymbol symbol; // Parse the wildcard symbol From 85c998d5726af8d31338b73c6c9ddace9d4ce330 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 May 2020 13:04:04 -0700 Subject: [PATCH 019/282] Change the wildcard syntax in tree queries 1. Use '_' instead of '*'. 2. Add '*' as a postfix operator for zero-or-more repetitions Signed-off-by: Patrick Thomson --- cli/src/tests/query_test.rs | 53 ++++++++++++++++++++----------------- cli/src/tests/tags_test.rs | 9 ++++--- lib/src/query.c | 35 +++++++++++++++--------- 3 files changed, 56 insertions(+), 41 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 945b3b1f06..5c98c959a4 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -381,8 +381,8 @@ fn test_query_matches_with_named_wildcard() { let query = Query::new( language, " - (return_statement (*) @the-return-value) - (binary_expression operator: * @the-operator) + (return_statement (_) @the-return-value) + (binary_expression operator: _ @the-operator) ", ) .unwrap(); @@ -413,7 +413,7 @@ fn test_query_matches_with_wildcard_at_the_root() { let query = Query::new( language, " - (* + (_ (comment) @doc . (function_declaration @@ -432,10 +432,10 @@ fn test_query_matches_with_wildcard_at_the_root() { let query = Query::new( language, " - (* (string) @a) - (* (number) @b) - (* (true) @c) - (* (false) @d) + (_ (string) @a) + (_ (number) @b) + (_ (true) @c) + (_ (false) @d) ", ) .unwrap(); @@ -477,7 +477,7 @@ fn test_query_matches_with_immediate_siblings() { .) (list . - (*) @first-element) + (_) @first-element) ", ) .unwrap(); @@ -506,17 +506,19 @@ fn test_query_matches_with_repeated_leaf_nodes() { let query = Query::new( language, " - (* + ( (comment)+ @doc . (class_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) - (* + ( (comment)+ @doc . (function_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) ", ) .unwrap(); @@ -618,7 +620,7 @@ fn test_query_matches_with_non_terminal_repetitions_within_root() { let query = Query::new( language, r#" - (* + (_ (expression_statement (identifier) @id)+) "#, @@ -673,18 +675,19 @@ fn test_query_matches_with_nested_repetitions() { } #[test] -fn test_query_matches_with_leading_optional_repeated_leaf_nodes() { +fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( language, " - (* - (comment)+? @doc + ( + (comment)* @doc . (function_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) ", ) .unwrap(); @@ -808,7 +811,7 @@ fn test_query_matches_with_repeated_internal_nodes() { let query = Query::new( language, " - (* + (_ (method_definition (decorator (identifier) @deco)+ name: (property_identifier) @name)) @@ -1099,12 +1102,12 @@ fn test_query_captures_basic() { language, r#" (pair - key: * @method.def + key: _ @method.def (function name: (identifier) @method.alias)) (variable_declarator - name: * @function.def + name: _ @function.def value: (function name: (identifier) @function.alias)) @@ -1352,7 +1355,7 @@ fn test_query_captures_with_many_nested_results_without_fields() { language, r#" (pair - key: * @method-def + key: _ @method-def (arrow_function)) ":" @colon @@ -1612,7 +1615,7 @@ fn test_query_captures_with_matches_removed() { r#" (binary_expression left: (identifier) @left - operator: * @op + operator: _ @op right: (identifier) @right) "#, ) @@ -1735,13 +1738,13 @@ fn test_query_capture_names() { r#" (if_statement condition: (binary_expression - left: * @left-operand + left: _ @left-operand operator: "||" - right: * @right-operand) + right: _ @right-operand) consequence: (statement_block) @body) (while_statement - condition:* @loop-condition) + condition: _ @loop-condition) "#, ) .unwrap(); diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 9bfd1f5631..fad8ebd866 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -33,7 +33,7 @@ const PYTHON_TAG_QUERY: &'static str = r#" const JS_TAG_QUERY: &'static str = r#" ( - (comment)+ @doc . + (comment)* @doc . (class_declaration name: (identifier) @name) @class (#select-adjacent! @doc @class) @@ -41,7 +41,7 @@ const JS_TAG_QUERY: &'static str = r#" ) ( - (comment)+ @doc . + (comment)* @doc . (method_definition name: (property_identifier) @name) @method (#select-adjacent! @doc @method) @@ -49,14 +49,15 @@ const JS_TAG_QUERY: &'static str = r#" ) ( - (comment)+ @doc . + (comment)* @doc . (function_declaration name: (identifier) @name) @function (#select-adjacent! @doc @function) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) -(call_expression function: (identifier) @name) @call +(call_expression + function: (identifier) @name) @call "#; const RUBY_TAG_QUERY: &'static str = r#" diff --git a/lib/src/query.c b/lib/src/query.c index 49cbb92f07..9c5677bf06 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -27,7 +27,7 @@ typedef struct { * represented as a sequence of these steps. Fields: * * - `symbol` - The grammar symbol to match. A zero value represents the - * wildcard symbol, '*'. + * wildcard symbol, '_'. * - `field` - The field name to match. A zero value means that a field name * was not specified. * - `capture_id` - An integer representing the name of the capture associated @@ -761,7 +761,7 @@ static TSQueryError ts_query__parse_pattern( TSSymbol symbol; // Parse the wildcard symbol - if (stream->next == '*') { + if (stream->next == '_') { symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; stream_advance(stream); } @@ -821,6 +821,15 @@ static TSQueryError ts_query__parse_pattern( } } + // Parse a wildcard pattern + else if (stream->next == '_') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Add a step that matches any kind of node + array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); + } + // Parse a double-quoted anonymous leaf node expression else if (stream->next == '"') { stream_advance(stream); @@ -892,15 +901,6 @@ static TSQueryError ts_query__parse_pattern( self->steps.contents[step_index].field = field_id; } - // Parse a wildcard pattern - else if (stream->next == '*') { - stream_advance(stream); - stream_skip_whitespace(stream); - - // Add a step that matches any kind of node - array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); - } - else { return TSQueryErrorSyntax; } @@ -913,18 +913,29 @@ static TSQueryError ts_query__parse_pattern( if (stream->next == '+') { stream_advance(stream); + stream_skip_whitespace(stream); QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); repeat_step.alternative_index = starting_step_index; repeat_step.is_placeholder = true; repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); - stream_skip_whitespace(stream); } else if (stream->next == '?') { stream_advance(stream); + stream_skip_whitespace(stream); step->alternative_index = self->steps.size; + } + + else if (stream->next == '*') { + stream_advance(stream); stream_skip_whitespace(stream); + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); + repeat_step.alternative_index = starting_step_index; + repeat_step.is_placeholder = true; + repeat_step.alternative_is_immediate = true; + array_push(&self->steps, repeat_step); + step->alternative_index = self->steps.size; } // Parse an '@'-prefixed capture pattern From 68f43b5865c13b5a4d654f2913f214c1653bc68b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 May 2020 13:23:44 -0700 Subject: [PATCH 020/282] Make query syntax backward-compatible --- lib/src/query.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 9c5677bf06..72bf04a8fc 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -715,15 +715,15 @@ static TSQueryError ts_query__parse_pattern( return PARENT_DONE; } - // Parse either: - // * A parenthesized sequence of nodes + // An open parenthesis can be the start of three possible constructs: + // * A grouped sequence // * A predicate // * A named node else if (stream->next == '(') { stream_advance(stream); stream_skip_whitespace(stream); - // If this parenthesis is followed by a node, then it represents grouping. + // If this parenthesis is followed by a node, then it represents a grouped sequence. if (stream->next == '(' || stream->next == '"') { bool child_is_immediate = false; for (;;) { @@ -750,7 +750,7 @@ static TSQueryError ts_query__parse_pattern( } } - // This parenthesis is the start of a predicate + // A pound character indicates the start of a predicate. else if (stream->next == '#') { stream_advance(stream); return ts_query__parse_predicate(self, stream); @@ -761,7 +761,13 @@ static TSQueryError ts_query__parse_pattern( TSSymbol symbol; // Parse the wildcard symbol - if (stream->next == '_') { + if ( + stream->next == '_' || + + // TODO - remove. + // For temporary backward compatibility, handle parenthesized '*' as a wildcard. + stream->next == '*' + ) { symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; stream_advance(stream); } @@ -771,6 +777,14 @@ static TSQueryError ts_query__parse_pattern( const char *node_name = stream->input; stream_scan_identifier(stream); uint32_t length = stream->input - node_name; + + // TODO - remove. + // For temporary backward compatibility, handle predicates without the leading '#' sign. + if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) { + stream_reset(stream, node_name); + return ts_query__parse_predicate(self, stream); + } + symbol = ts_language_symbol_for_name( self->language, node_name, From f6f96f3503d1cf77d661c8bf51cf8afa968ad3e5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 May 2020 14:24:00 -0700 Subject: [PATCH 021/282] Document the new query syntax --- docs/section-2-using-parsers.md | 198 +++++++++++++++++++------------- 1 file changed, 117 insertions(+), 81 deletions(-) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 406e836444..839cacf3c4 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -24,19 +24,22 @@ script/build-lib Alternatively, you can use the library in a larger project by adding one source file to the project. This source file needs two directories to be in the include path when compiled: **source file:** -* `tree-sitter/lib/src/lib.c` + +- `tree-sitter/lib/src/lib.c` **include directories:** -* `tree-sitter/lib/src` -* `tree-sitter/lib/include` + +- `tree-sitter/lib/src` +- `tree-sitter/lib/include` ### The Basic Objects There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`. -* A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. -* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. -* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. -* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. + +- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. +- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. +- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. +- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. ### An Example Program @@ -128,7 +131,7 @@ TSTree *ts_parser_parse_string( ); ``` -You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](https://en.wikipedia.org/wiki/Rope_(data_structure)). In this case, you can use the more general `ts_parser_parse` function: +You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](). In this case, you can use the more general `ts_parser_parse` function: ```c TSTree *ts_parser_parse( @@ -155,7 +158,7 @@ typedef struct { ### Syntax Nodes -Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's *type* is a string that indicates which grammar rule the node represents. +Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's _type_ is a string that indicates which grammar rule the node represents. ```c const char *ts_node_type(TSNode); @@ -178,7 +181,7 @@ TSPoint ts_node_end_point(TSNode); ### Retrieving Nodes -Every tree has a *root node*: +Every tree has a _root node_: ```c TSNode ts_tree_root_node(const TSTree *); @@ -199,7 +202,7 @@ TSNode ts_node_prev_sibling(TSNode); TSNode ts_node_parent(TSNode); ``` -These methods may all return a *null node* to indicate, for example, that a node does not *have* a next sibling. You can check if a node is null: +These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling. You can check if a node is null: ```c bool ts_node_is_null(TSNode); @@ -207,21 +210,15 @@ bool ts_node_is_null(TSNode); ### Named vs Anonymous Nodes -Tree-sitter produces [*concrete* syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [*abstract* syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between *named* and *anonymous* nodes. +Tree-sitter produces [_concrete_ syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [_abstract_ syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between _named_ and _anonymous_ nodes. Consider a grammar rule like this: ```js -if_statement: $ => seq( - 'if', - '(', - $._expression, - ')', - $._statement, -) +if_statement: ($) => seq("if", "(", $._expression, ")", $._statement); ``` -A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as *named* nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would *not* be named nodes, because they are represented in the grammar as simple strings. +A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they are represented in the grammar as simple strings. You can check whether any given node is named: @@ -242,7 +239,7 @@ If you use this group of methods, the syntax tree functions much like an abstrac ### Node Field Names -To make syntax nodes easier to analyze, many grammars assign unique *field names* to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name: +To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name: ```c TSNode ts_node_child_by_field_name( @@ -270,7 +267,7 @@ TSNode ts_node_child_by_field_id(TSNode, TSFieldId); ### Editing -In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must *edit* the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code. +In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code. ```c typedef struct { @@ -293,13 +290,13 @@ When you edit a syntax tree, the positions of its nodes will change. If you have void ts_node_edit(TSNode *, const TSInputEdit *); ``` -This `ts_node_edit` function is *only* needed in the case where you have retrieved `TSNode` instances *before* editing the tree, and then *after* editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. +This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. ### Multi-language Documents Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. -Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain *ranges* of a file. +Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. ```c typedef struct { @@ -409,13 +406,13 @@ Tree-sitter supports multi-threaded use cases by making syntax trees very cheap TSTree *ts_tree_copy(const TSTree *); ``` -Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are *not* thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. +Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. ## Other Tree Operations ### Walking Trees with Tree Cursors -You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a *tree cursor*. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. +You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. You can initialize a cursor from any node: @@ -441,19 +438,19 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); ``` -### Pattern Matching with Queries +## Pattern Matching with Queries Many code analysis tasks involve searching for patterns in syntax trees. Tree-sitter provides a small declarative language for expressing these patterns and searching for matches. The language is similar to the format of Tree-sitter's [unit test system](./creating-parsers#command-test). -#### Basics +### Query Syntax -A *query* consists of one or more *patterns*, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: +A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: ``` (binary_expression (number_literal) (number_literal)) ``` -Children can also be omitted. For example, this would match any `binary_expression` where at least *one* of child is a `string_literal` node: +Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: ``` (binary_expression (string_literal)) @@ -481,13 +478,13 @@ The parenthesized syntax for writing nodes only applies to [named nodes](#named- #### Capturing Nodes -When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written *after* the nodes that they refer to, and start with an `@` character. +When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_ the nodes that they refer to, and start with an `@` character. -For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `function-definition` with the identifier: +For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: ``` (assignment_expression - left: (identifier) @function-definition + left: (identifier) @the-function-name right: (function)) ``` @@ -501,29 +498,79 @@ And this pattern would match all method definitions, associating the name `the-m name: (property_identifier) @the-method-name))) ``` +#### Quantification Operators + +You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously to the `+` and `*` operators [in regular expressions](https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts). The `+` operator matches _one or more_ repetitions of a pattern, and the `*` operator matches _zero or more_. + +For example, this pattern would match a sequence of one or more comments: + +``` +(comment)+ +``` + +This pattern would match a class declaration, capturing all of the decorators if any were present: + +``` +(class_declaration + (decorator)* @the-decorator + name: (identifier) @the-name) +``` + +You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: + +``` +(call_expression + function: (identifier) @the-function + arguments: (arguments (string)? @the-string-arg)) +``` + +#### Grouping Sibling Nodes + +You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: + +``` +( + (comment) + (function_declaration) +) +``` + +Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: + +``` +( + (number) + ("," (number))* +) +``` + #### Predicates -You can also specify other conditions that should restrict the nodes that match a given pattern. You do this by enclosing the pattern in an additional pair of parentheses, and specifying one or more *predicate* S-expressions after your main pattern. Predicate S-expressions must start with a predicate name, and contain either `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: ``` -((identifier) @constant - (match? @constant "^[A-Z][A-Z_]+")) +( + (identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+") +) ``` And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: ``` -((pair - key: (property_identifier) @key-name - value: (identifier) @value-name) - (eq? @key-name @value-name)) +( + (pair + key: (property_identifier) @key-name + value: (identifier) @value-name) + (#eq? @key-name @value-name) +) ``` -*Note* - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `eq?` and `match?`. +_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. -#### The Query API +### The Query API Create a query by specifying a string containing one or more patterns: @@ -583,7 +630,7 @@ This function will return `false` when there are no more matches. Otherwise, it ## Static Node Types -In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This *node types* file provides structured data about every possible syntax node in a grammar. +In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_ file provides structured data about every possible syntax node in a grammar. You can use this data to generate type declarations in statically-typed programming languages. For example, GitHub's [Semantic](https://github.com/github/semantic) uses these node types files to [generate Haskell data types](https://github.com/github/semantic/tree/master/semantic-ast) for every possible syntax node, which allows for code analysis algorithms to be structurally verified by the Haskell type system. @@ -593,9 +640,8 @@ The node types file contains an array of objects, each of which describes a part Every object in this array has these two entries: -* `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). -* `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. - +- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). +- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. Examples: @@ -614,16 +660,16 @@ Together, these two fields constitute a unique identifier for a node type; no tw #### Internal Nodes -Many syntax nodes can have *children*. The node type object describes the possible children that a node can have using the following entries: +Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries: -* `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are *child type* objects, described below. -* `"children"` - Another *child type* object that describes all of the node's possible *named* children *without* fields. +- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below. +- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields. -A *child type* object describes a set of child nodes using the following entries: +A _child type_ object describes a set of child nodes using the following entries: -* `"required"` - A boolean indicating whether there is always *at least one* node in this set. -* `"multiple"` - A boolean indicating whether there can be *multiple* nodes in this set. -* `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. +- `"required"` - A boolean indicating whether there is always _at least one_ node in this set. +- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set. +- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. Example with fields: @@ -635,31 +681,25 @@ Example with fields: "body": { "multiple": false, "required": true, - "types": [ - {"type": "statement_block", "named": true} - ] + "types": [{ "type": "statement_block", "named": true }] }, "decorator": { "multiple": true, "required": false, - "types": [ - {"type": "decorator", "named": true} - ] + "types": [{ "type": "decorator", "named": true }] }, "name": { "multiple": false, "required": true, "types": [ - {"type": "computed_property_name", "named": true}, - {"type": "property_identifier", "named": true}, + { "type": "computed_property_name", "named": true }, + { "type": "property_identifier", "named": true } ] }, "parameters": { "multiple": false, "required": true, - "types": [ - {"type": "formal_parameters", "named": true} - ] + "types": [{ "type": "formal_parameters", "named": true }] } } } @@ -676,8 +716,8 @@ Example with children: "multiple": true, "required": false, "types": [ - {"type": "_expression", "named": true}, - {"type": "spread_element", "named": true} + { "type": "_expression", "named": true }, + { "type": "spread_element", "named": true } ] } } @@ -685,11 +725,11 @@ Example with children: #### Supertype Nodes -In Tree-sitter grammars, there are usually certain rules that represent abstract *categories* of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol. +In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol. -Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it *will* show up in the node types file, with the following special entry: +Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry: -* `"subtypes"` - An array of objects that specify the *types* of nodes that this 'supertype' node can wrap. +- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap. Example: @@ -698,11 +738,11 @@ Example: "type": "_declaration", "named": true, "subtypes": [ - {"type": "class_declaration", "named": true}, - {"type": "function_declaration", "named": true}, - {"type": "generator_function_declaration", "named": true}, - {"type": "lexical_declaration", "named": true}, - {"type": "variable_declaration", "named": true} + { "type": "class_declaration", "named": true }, + { "type": "function_declaration", "named": true }, + { "type": "generator_function_declaration", "named": true }, + { "type": "lexical_declaration", "named": true }, + { "type": "variable_declaration", "named": true } ] } ``` @@ -719,17 +759,13 @@ Example: "declaration": { "multiple": false, "required": false, - "types": [ - {"type": "_declaration", "named": true} - ] + "types": [{ "type": "_declaration", "named": true }] }, "source": { "multiple": false, "required": false, - "types": [ - {"type": "string", "named": true} - ] - }, + "types": [{ "type": "string", "named": true }] + } } } ``` From fce5c50f81f0b070f0ac1faf0f4e23ba19198d3b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 May 2020 15:22:05 -0700 Subject: [PATCH 022/282] Fix wasm query tests --- lib/binding_web/binding.js | 20 +-- lib/binding_web/test/query-test.js | 200 ++++++++++++++--------------- lib/src/query.c | 13 +- 3 files changed, 113 insertions(+), 120 deletions(-) diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index f52f61d55f..feedb37fe5 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -748,10 +748,10 @@ class Language { isPositive = false; case 'eq?': if (steps.length !== 3) throw new Error( - `Wrong number of arguments to \`eq?\` predicate. Expected 2, got ${steps.length - 1}` + `Wrong number of arguments to \`#eq?\` predicate. Expected 2, got ${steps.length - 1}` ); if (steps[1].type !== 'capture') throw new Error( - `First argument of \`eq?\` predicate must be a capture. Got "${steps[1].value}"` + `First argument of \`#eq?\` predicate must be a capture. Got "${steps[1].value}"` ); if (steps[2].type === 'capture') { const captureName1 = steps[1].name; @@ -780,13 +780,13 @@ class Language { case 'match?': if (steps.length !== 3) throw new Error( - `Wrong number of arguments to \`match?\` predicate. Expected 2, got ${steps.length - 1}.` + `Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.` ); if (steps[1].type !== 'capture') throw new Error( - `First argument of \`match?\` predicate must be a capture. Got "${steps[1].value}".` + `First argument of \`#match?\` predicate must be a capture. Got "${steps[1].value}".` ); if (steps[2].type !== 'string') throw new Error( - `Second argument of \`match?\` predicate must be a string. Got @${steps[2].value}.` + `Second argument of \`#match?\` predicate must be a string. Got @${steps[2].value}.` ); const captureName = steps[1].name; const regex = new RegExp(steps[2].value); @@ -800,10 +800,10 @@ class Language { case 'set!': if (steps.length < 2 || steps.length > 3) throw new Error( - `Wrong number of arguments to \`set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` + `Wrong number of arguments to \`#set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` ); if (steps.some(s => s.type !== 'string')) throw new Error( - `Arguments to \`set!\` predicate must be a strings.".` + `Arguments to \`#set!\` predicate must be a strings.".` ); if (!setProperties[i]) setProperties[i] = {}; setProperties[i][steps[1].value] = steps[2] ? steps[2].value : null; @@ -812,10 +812,10 @@ class Language { case 'is?': case 'is-not?': if (steps.length < 2 || steps.length > 3) throw new Error( - `Wrong number of arguments to \`${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` + `Wrong number of arguments to \`#${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.` ); if (steps.some(s => s.type !== 'string')) throw new Error( - `Arguments to \`${operator}\` predicate must be a strings.".` + `Arguments to \`#${operator}\` predicate must be a strings.".` ); const properties = operator === 'is?' ? assertedProperties : refutedProperties; if (!properties[i]) properties[i] = {}; @@ -823,7 +823,7 @@ class Language { break; default: - throw new Error(`Unknown query predicate \`${steps[0].value}\``); + throw new Error(`Unknown query predicate \`#${steps[0].value}\``); } steps.length = 0; diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index b5a37ed9c0..8683214a04 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -1,12 +1,10 @@ -const {assert} = require('chai'); +const { assert } = require("chai"); let Parser, JavaScript; describe("Query", () => { let parser, tree, query; - before(async () => - ({Parser, JavaScript} = await require('./helper')) - ); + before(async () => ({ Parser, JavaScript } = await require("./helper"))); beforeEach(() => { parser = new Parser().setLanguage(JavaScript); @@ -18,81 +16,75 @@ describe("Query", () => { if (query) query.delete(); }); - describe('construction', () => { - it('throws an error on invalid patterns', () => { + describe("construction", () => { + it("throws an error on invalid patterns", () => { assert.throws(() => { - JavaScript.query("(function_declaration wat)") - }, "Bad syntax at offset 22: \'wat)\'..."); + JavaScript.query("(function_declaration wat)"); + }, "Bad syntax at offset 22: 'wat)'..."); assert.throws(() => { - JavaScript.query("(non_existent)") + JavaScript.query("(non_existent)"); }, "Bad node name 'non_existent'"); assert.throws(() => { - JavaScript.query("(a)") + JavaScript.query("(a)"); }, "Bad node name 'a'"); assert.throws(() => { - JavaScript.query("(function_declaration non_existent:(identifier))") + JavaScript.query("(function_declaration non_existent:(identifier))"); }, "Bad field name 'non_existent'"); }); - it('throws an error on invalid predicates', () => { + it("throws an error on invalid predicates", () => { assert.throws(() => { - JavaScript.query("((identifier) @abc (eq? @ab hi))") + JavaScript.query("((identifier) @abc (#eq? @ab hi))"); }, "Bad capture name @ab"); assert.throws(() => { - JavaScript.query("((identifier) @abc (eq? @ab hi))") + JavaScript.query("((identifier) @abc (#eq? @ab hi))"); }, "Bad capture name @ab"); assert.throws(() => { - JavaScript.query("((identifier) @abc (eq?))") - }, "Wrong number of arguments to `eq?` predicate. Expected 2, got 0"); + JavaScript.query("((identifier) @abc (#eq?))"); + }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 0"); assert.throws(() => { - JavaScript.query("((identifier) @a (eq? @a @a @a))") - }, "Wrong number of arguments to `eq?` predicate. Expected 2, got 3"); + JavaScript.query("((identifier) @a (eq? @a @a @a))"); + }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 3"); assert.throws(() => { - JavaScript.query("((identifier) @a (something-else? @a))") - }, "Unknown query predicate `something-else?`"); + JavaScript.query("((identifier) @a (#something-else? @a))"); + }, "Unknown query predicate `#something-else?`"); }); }); - describe('.matches', () => { - it('returns all of the matches for the given query', () => { + describe(".matches", () => { + it("returns all of the matches for the given query", () => { tree = parser.parse("function one() { two(); function three() {} }"); query = JavaScript.query(` - (function_declaration name:(identifier) @fn-def) - (call_expression function:(identifier) @fn-ref) + (function_declaration name: (identifier) @fn-def) + (call_expression function: (identifier) @fn-ref) `); const matches = query.matches(tree.rootNode); - assert.deepEqual( - formatMatches(matches), - [ - {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]}, - {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]}, - {pattern: 0, captures: [{name: 'fn-def', text: 'three'}]}, - ] - ); + assert.deepEqual(formatMatches(matches), [ + { pattern: 0, captures: [{ name: "fn-def", text: "one" }] }, + { pattern: 1, captures: [{ name: "fn-ref", text: "two" }] }, + { pattern: 0, captures: [{ name: "fn-def", text: "three" }] }, + ]); }); - it('can search in a specified ranges', () => { + it("can search in a specified ranges", () => { tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]"); - query = JavaScript.query('(identifier) @element'); + query = JavaScript.query("(identifier) @element"); const matches = query.matches( tree.rootNode, - {row: 1, column: 1}, - {row: 3, column: 1} - ); - assert.deepEqual( - formatMatches(matches), - [ - {pattern: 0, captures: [{name: 'element', text: 'd'}]}, - {pattern: 0, captures: [{name: 'element', text: 'e'}]}, - {pattern: 0, captures: [{name: 'element', text: 'f'}]}, - {pattern: 0, captures: [{name: 'element', text: 'g'}]}, - ] + { row: 1, column: 1 }, + { row: 3, column: 1 } ); + assert.deepEqual(formatMatches(matches), [ + { pattern: 0, captures: [{ name: "element", text: "d" }] }, + { pattern: 0, captures: [{ name: "element", text: "e" }] }, + { pattern: 0, captures: [{ name: "element", text: "f" }] }, + { pattern: 0, captures: [{ name: "element", text: "g" }] }, + ]); }); }); - describe('.captures', () => { - it('returns all of the captures for the given query, in order', () => { + describe(".captures", () => { + it("returns all of the captures for the given query, in order", () => { tree = parser.parse(` a({ bc: function de() { @@ -105,12 +97,12 @@ describe("Query", () => { `); query = JavaScript.query(` (pair - key: * @method.def + key: _ @method.def (function name: (identifier) @method.alias)) (variable_declarator - name: * @function.def + name: _ @function.def value: (function name: (identifier) @function.alias)) @@ -119,26 +111,23 @@ describe("Query", () => { `); const captures = query.captures(tree.rootNode); - assert.deepEqual( - formatCaptures(captures), - [ - {name: "method.def", text: "bc"}, - {name: "delimiter", text: ":"}, - {name: "method.alias", text: "de"}, - {name: "function.def", text: "fg"}, - {name: "operator", text: "="}, - {name: "function.alias", text: "hi"}, - {name: "method.def", text: "jk"}, - {name: "delimiter", text: ":"}, - {name: "method.alias", text: "lm"}, - {name: "function.def", text: "no"}, - {name: "operator", text: "="}, - {name: "function.alias", text: "pq"}, - ] - ); + assert.deepEqual(formatCaptures(captures), [ + { name: "method.def", text: "bc" }, + { name: "delimiter", text: ":" }, + { name: "method.alias", text: "de" }, + { name: "function.def", text: "fg" }, + { name: "operator", text: "=" }, + { name: "function.alias", text: "hi" }, + { name: "method.def", text: "jk" }, + { name: "delimiter", text: ":" }, + { name: "method.alias", text: "lm" }, + { name: "function.def", text: "no" }, + { name: "operator", text: "=" }, + { name: "function.alias", text: "pq" }, + ]); }); - it('handles conditions that compare the text of capture to literal strings', () => { + it("handles conditions that compare the text of capture to literal strings", () => { tree = parser.parse(` const ab = require('./ab'); new Cd(EF); @@ -148,32 +137,29 @@ describe("Query", () => { (identifier) @variable ((identifier) @function.builtin - (eq? @function.builtin "require")) + (#eq? @function.builtin "require")) ((identifier) @constructor - (match? @constructor "^[A-Z]")) + (#match? @constructor "^[A-Z]")) ((identifier) @constant - (match? @constant "^[A-Z]{2,}$")) + (#match? @constant "^[A-Z]{2,}$")) `); const captures = query.captures(tree.rootNode); - assert.deepEqual( - formatCaptures(captures), - [ - {name: "variable", text: "ab"}, - {name: "variable", text: "require"}, - {name: "function.builtin", text: "require"}, - {name: "variable", text: "Cd"}, - {name: "constructor", text: "Cd"}, - {name: "variable", text: "EF"}, - {name: "constructor", text: "EF"}, - {name: "constant", text: "EF"}, - ] - ); + assert.deepEqual(formatCaptures(captures), [ + { name: "variable", text: "ab" }, + { name: "variable", text: "require" }, + { name: "function.builtin", text: "require" }, + { name: "variable", text: "Cd" }, + { name: "constructor", text: "Cd" }, + { name: "variable", text: "EF" }, + { name: "constructor", text: "EF" }, + { name: "constant", text: "EF" }, + ]); }); - it('handles conditions that compare the text of capture to each other', () => { + it("handles conditions that compare the text of capture to each other", () => { tree = parser.parse(` ab = abc + 1; def = de + 1; @@ -181,56 +167,60 @@ describe("Query", () => { `); query = JavaScript.query(` - ((assignment_expression + ( + (assignment_expression left: (identifier) @id1 right: (binary_expression left: (identifier) @id2)) - (eq? @id1 @id2)) + (#eq? @id1 @id2) + ) `); const captures = query.captures(tree.rootNode); - assert.deepEqual( - formatCaptures(captures), - [ - {name: "id1", text: "ghi"}, - {name: "id2", text: "ghi"}, - ] - ); + assert.deepEqual(formatCaptures(captures), [ + { name: "id1", text: "ghi" }, + { name: "id2", text: "ghi" }, + ]); }); - it('handles patterns with properties', () => { + it("handles patterns with properties", () => { tree = parser.parse(`a(b.c);`); query = JavaScript.query(` ((call_expression (identifier) @func) - (set! foo) - (set! bar baz)) + (#set! foo) + (#set! bar baz)) ((property_identifier) @prop - (is? foo) - (is-not? bar baz)) + (#is? foo) + (#is-not? bar baz)) `); const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ - {name: 'func', text: 'a', setProperties: {foo: null, bar: 'baz'}}, - {name: 'prop', text: 'c', assertedProperties: {foo: null}, refutedProperties: {bar: 'baz'}}, + { name: "func", text: "a", setProperties: { foo: null, bar: "baz" } }, + { + name: "prop", + text: "c", + assertedProperties: { foo: null }, + refutedProperties: { bar: "baz" }, + }, ]); }); }); }); function formatMatches(matches) { - return matches.map(({pattern, captures}) => ({ + return matches.map(({ pattern, captures }) => ({ pattern, - captures: formatCaptures(captures) - })) + captures: formatCaptures(captures), + })); } function formatCaptures(captures) { - return captures.map(c => { + return captures.map((c) => { const node = c.node; delete c.node; c.text = node.text; return c; - }) + }); } diff --git a/lib/src/query.c b/lib/src/query.c index 72bf04a8fc..5c06ed0fcb 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -581,10 +581,8 @@ static TSQueryError ts_query__parse_predicate( .type = TSQueryPredicateStepTypeString, .value_id = id, })); - stream_advance(stream); stream_skip_whitespace(stream); - unsigned step_count = 0; for (;;) { if (stream->next == ')') { stream_advance(stream); @@ -689,7 +687,6 @@ static TSQueryError ts_query__parse_predicate( return TSQueryErrorSyntax; } - step_count++; stream_skip_whitespace(stream); } @@ -765,7 +762,7 @@ static TSQueryError ts_query__parse_pattern( stream->next == '_' || // TODO - remove. - // For temporary backward compatibility, handle parenthesized '*' as a wildcard. + // For temporary backward compatibility, handle '*' as a wildcard. stream->next == '*' ) { symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; @@ -836,7 +833,13 @@ static TSQueryError ts_query__parse_pattern( } // Parse a wildcard pattern - else if (stream->next == '_') { + else if ( + stream->next == '_' || + + // TODO remove. + // For temporary backward compatibility, handle '*' as a wildcard. + stream->next == '*' + ) { stream_advance(stream); stream_skip_whitespace(stream); From cdc973866f6552aba996b4d876928f8ee1005ca0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 15:42:11 -0700 Subject: [PATCH 023/282] Fix build-wasm command on latest emscripten --- cli/src/generate/render.rs | 2 +- cli/src/wasm.rs | 38 +++++++++++++++----------------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 67cd2fbed0..f82f076549 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -759,7 +759,7 @@ impl Generator { && state.terminal_entries.len() == 1 && *state.terminal_entries.iter().next().unwrap().0 == Symbol::end() { - add_line!(self, "[{}] = {{-1}},", i,); + add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); } else if state.external_lex_state_id > 0 { add_line!( self, diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 5ee2cec0a7..659c1cc17c 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -59,7 +59,9 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu // Run `emcc` in a container using the `emscripten-slim` image command.args(&["trzeci/emscripten-slim", "emcc"]); } else { - return Error::err("You must have either emcc or docker on your PATH to run this command".to_string()); + return Error::err( + "You must have either emcc or docker on your PATH to run this command".to_string(), + ); } command.args(&[ @@ -81,31 +83,21 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu "src", ]); - // Find source files to pass to emscripten - let src_entries = fs::read_dir(&src_dir).map_err(Error::wrap(|| { - format!("Failed to read source directory {:?}", src_dir) - }))?; - - for entry in src_entries { - let entry = entry?; - let file_name = entry.file_name(); - - // Do not compile the node.js binding file. - if file_name - .to_str() - .map_or(false, |s| s.starts_with("binding")) - { - continue; - } + let parser_c_path = src_dir.join("parser.c"); + let scanner_c_path = src_dir.join("scanner.c"); + let scanner_cc_path = src_dir.join("scanner.cc"); + let scanner_cpp_path = src_dir.join("scanner.cpp"); - // Compile any .c, .cc, or .cpp files - if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) { - if extension == "c" || extension == "cc" || extension == "cpp" { - command.arg(Path::new("src").join(entry.file_name())); - } - } + if scanner_cc_path.exists() { + command.arg("-xc++").arg(&scanner_cc_path); + } else if scanner_cpp_path.exists() { + command.arg("-xc++").arg(&scanner_cpp_path); + } else if scanner_c_path.exists() { + command.arg(&scanner_c_path); } + command.arg(&parser_c_path); + let output = command .output() .map_err(Error::wrap(|| "Failed to run emcc command"))?; From f38f5d1d224c85327f9431a9456c75b6207e8449 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:16:48 -0700 Subject: [PATCH 024/282] Tweak readmes --- CONTRIBUTING.md | 1 + README.md | 15 ++++++---- docs/section-6-contributing.md | 54 +++++++++++++++++----------------- lib/binding_rust/README.md | 11 +++---- 4 files changed, 42 insertions(+), 39 deletions(-) create mode 120000 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 120000 index 0000000000..4f64371073 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +docs/section-6-contributing.md \ No newline at end of file diff --git a/README.md b/README.md index b6df76e9c1..a4bb7a483d 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,14 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: -* **General** enough to parse any programming language -* **Fast** enough to parse on every keystroke in a text editor -* **Robust** enough to provide useful results even in the presence of syntax errors -* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application +- **General** enough to parse any programming language +- **Fast** enough to parse on every keystroke in a text editor +- **Robust** enough to provide useful results even in the presence of syntax errors +- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application -[Documentation](https://tree-sitter.github.io/tree-sitter/) +## Links + +- [Documentation](https://tree-sitter.github.io) +- [Rust binding](lib/binding_rust/README.md) +- [WASM binding](lib/binding_web/README.md) +- [Command-line interface](cli/README.md) diff --git a/docs/section-6-contributing.md b/docs/section-6-contributing.md index 7e11dc00c4..1ebe50b340 100644 --- a/docs/section-6-contributing.md +++ b/docs/section-6-contributing.md @@ -18,7 +18,7 @@ To make changes to Tree-sitter, you should have: 1. A C compiler, for compiling the core library and the generated parsers. 2. A [Rust toolchain](https://rustup.rs/), for compiling the Rust bindings, the highlighting library, and the CLI. 3. Node.js and NPM, for generating parsers from `grammar.js` files. -4. Either [Docker](https://www.docker.com/) or [Emscripten](https://emscripten.org/), for compiling the library to WASM. +4. Either [Emscripten](https://emscripten.org/) or [Docker](https://www.docker.com/), for compiling the library to WASM. ### Building @@ -86,7 +86,7 @@ You can run the tests under the debugger (either `lldb` or `gdb`) using the `-g` script/test test_does_something -g ``` -Part of the Tree-sitter test suite involves parsing the *corpus* tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular *language*, you can pass the `-l` flag. And if you want to run a particular *example* from the corpus, you can pass the `-e` flag: +Part of the Tree-sitter test suite involves parsing the _corpus_ tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular _language_, you can pass the `-l` flag. And if you want to run a particular _example_ from the corpus, you can pass the `-e` flag: ```sh script/test -l javascript -e Arrays @@ -96,18 +96,18 @@ script/test -l javascript -e Arrays The main [`tree-sitter/tree-sitter`](https://github.com/tree-sitter/tree-sitter) repository contains the source code for several packages that are published to package registries for different languages: -* Rust crates on [crates.io](https://crates.io): - * [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library - * [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library - * [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool -* JavaScript modules on [npmjs.com](https://npmjs.com): - * [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library - * [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool +- Rust crates on [crates.io](https://crates.io): + - [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library + - [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library + - [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool +- JavaScript modules on [npmjs.com](https://npmjs.com): + - [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library + - [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool There are also several other dependent repositories that contain other published packages: -* [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com -* [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org). +- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com +- [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org). ## Publishing New Releases @@ -115,31 +115,31 @@ Publishing a new release of the CLI requires these steps: 1. Commit and push all outstanding changes and verify that CI passes: - ```sh - git commit -m "Fix things" - git push - ``` + ```sh + git commit -m "Fix things" + git push + ``` 2. Create a new tag: - ```sh - script/version patch - ``` + ```sh + script/version patch + ``` - This will determine the current version, increment the *patch* version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command. + This will determine the current version, increment the _patch_ version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command. 3. Push the commit and the tag: - ```sh - git push - git push --tags - ``` + ```sh + git push + git push --tags + ``` 4. Wait for CI to pass. Because of the git tag, the CI jobs will publish artifacts to [a GitHub release](https://github.com/tree-sitter/tree-sitter/releases). The npm module of `tree-sitter-cli` works by downloading the appropriate binary from the corresponding GitHub release during installation. So it's best not to publish the npm package until the binaries are uploaded. 5. Publish the npm package: - ```sh - cd cli/npm - npm publish - ``` + ```sh + cd cli/npm + npm publish + ``` diff --git a/lib/binding_rust/README.md b/lib/binding_rust/README.md index 0ee4ba3f97..e85f45f356 100644 --- a/lib/binding_rust/README.md +++ b/lib/binding_rust/README.md @@ -1,5 +1,4 @@ -Rust Tree-sitter -================ +# Rust Tree-sitter [![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) [![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) @@ -14,15 +13,12 @@ First, create a parser: ```rust use tree_sitter::{Parser, Language}; -// ... - let mut parser = Parser::new(); ``` Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory): -```rust -extern crate cc; +```rust use std::path::PathBuf; fn main() { @@ -37,12 +33,13 @@ fn main() { ``` Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: + ```toml [build-dependencies] cc="*" ``` -To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. +To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. ```rust extern "C" { fn tree_sitter_c() -> Language; } From 96bdcfcf575a965db0f79d90fc7d04e6c655bb3a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:18:20 -0700 Subject: [PATCH 025/282] mac CI: Use newer emscripten --- script/fetch-emscripten | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/fetch-emscripten b/script/fetch-emscripten index bbd15a394f..d16c857e04 100755 --- a/script/fetch-emscripten +++ b/script/fetch-emscripten @@ -2,7 +2,7 @@ set -e -EMSCRIPTEN_VERSION=1.39.0 +EMSCRIPTEN_VERSION=1.39.15 mkdir -p target EMSDK_DIR="./target/emsdk" From 392f023e59ee55e421c3b0c610ce726971f4c748 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:21:00 -0700 Subject: [PATCH 026/282] rust: 0.16.0 --- lib/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 788294205a..353ec8cec4 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.6.3" +version = "0.16.0" authors = ["Max Brunsfeld "] license = "MIT" readme = "binding_rust/README.md" From d7a188ce7fff5e9820f7d4cbeb2eead547d02a7b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:22:11 -0700 Subject: [PATCH 027/282] web: 0.16.3 --- lib/binding_web/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index 4323f33cdd..043394e4a6 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -1,6 +1,6 @@ { "name": "web-tree-sitter", - "version": "0.16.2", + "version": "0.16.3", "description": "Tree-sitter bindings for the web", "main": "tree-sitter.js", "types": "tree-sitter-web.d.ts", From 862b56dfe151351ac89aaf5502a5b605fa03cf7e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:22:44 -0700 Subject: [PATCH 028/282] 0.16.6 --- Cargo.lock | 10 +++++----- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f539e1816e..710e822d23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,7 +732,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.6.3" +version = "0.16.0" dependencies = [ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.16.5" +version = "0.16.6" dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", @@ -762,7 +762,7 @@ dependencies = [ "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.6.3", + "tree-sitter 0.16.0", "tree-sitter-highlight 0.1.6", "tree-sitter-tags 0.1.6", "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -773,7 +773,7 @@ name = "tree-sitter-highlight" version = "0.1.6" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.6.3", + "tree-sitter 0.16.0", ] [[package]] @@ -782,7 +782,7 @@ version = "0.1.6" dependencies = [ "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.6.3", + "tree-sitter 0.16.0", ] [[package]] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 27706945a2..6904c42a95 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.16.5" +version = "0.16.6" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index ad46e20d70..8965d02053 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.16.5", + "version": "0.16.6", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 61814b468db91e24605ccd2b106f458d6a5ab90c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:28:26 -0700 Subject: [PATCH 029/282] Remove build-lib script, recommend make --- docs/section-2-using-parsers.md | 8 ++------ script/build-fuzzers | 2 +- script/build-lib | 22 ---------------------- 3 files changed, 3 insertions(+), 29 deletions(-) delete mode 100755 script/build-lib diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 839cacf3c4..5b807b9004 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -15,13 +15,9 @@ All of the API functions shown here are declared and documented in the [`tree_si ### Building the Library -To build the library on a POSIX system, run this script, which will create a static library called `libtree-sitter.a` in the Tree-sitter folder: +To build the library on a POSIX system, just run `make` in the Tree-sitter directory. This will create a static library called `libtree-sitter.a` as well as dynamic libraries. -```sh -script/build-lib -``` - -Alternatively, you can use the library in a larger project by adding one source file to the project. This source file needs two directories to be in the include path when compiled: +Alternatively, you can incorporate the library in a larger project's build system by adding one source file to the build. This source file needs two directories to be in the include path when compiled: **source file:** diff --git a/script/build-fuzzers b/script/build-fuzzers index c0675cd679..bff43c8b47 100755 --- a/script/build-fuzzers +++ b/script/build-fuzzers @@ -21,7 +21,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"} CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"} export CFLAGS -script/build-lib +make if [ -z "$@" ]; then languages=$(ls test/fixtures/grammars) diff --git a/script/build-lib b/script/build-lib deleted file mode 100755 index 83fa69d8fd..0000000000 --- a/script/build-lib +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -# If `CC` isn't set, pick a default compiler -if hash clang 2>/dev/null; then - : ${CC:=clang} -else - : ${CC:=gcc} -fi - -${CC} \ - -c \ - -O3 \ - -std=gnu99 \ - $CFLAGS \ - -I lib/src \ - -I lib/include \ - lib/src/lib.c \ - -o tree-sitter.o - -rm -f libtree-sitter.a -ar rcs libtree-sitter.a tree-sitter.o -rm tree-sitter.o From 1d68896900e42d71f67d41631872094587c6ee83 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:31:02 -0700 Subject: [PATCH 030/282] highlight: 0.2.0 --- highlight/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index 94a4e03290..bb94fb2173 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.1.6" +version = "0.2.0" authors = [ "Max Brunsfeld ", "Tim Clem " From d17d1b36b28b06b177c6c36e7cd71708d16eabac Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:32:37 -0700 Subject: [PATCH 031/282] tags: 0.2.0 --- Cargo.lock | 8 ++++---- tags/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 710e822d23..8baba27d75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -763,14 +763,14 @@ dependencies = [ "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.16.0", - "tree-sitter-highlight 0.1.6", - "tree-sitter-tags 0.1.6", + "tree-sitter-highlight 0.2.0", + "tree-sitter-tags 0.2.0", "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.6" +version = "0.2.0" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.16.0", @@ -778,7 +778,7 @@ dependencies = [ [[package]] name = "tree-sitter-tags" -version = "0.1.6" +version = "0.2.0" dependencies = [ "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/tags/Cargo.toml b/tags/Cargo.toml index 43557bb2d7..531b54b4ce 100644 --- a/tags/Cargo.toml +++ b/tags/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-tags" description = "Library for extracting tag information" -version = "0.1.6" +version = "0.2.0" authors = [ "Max Brunsfeld ", "Patrick Thomson " From 2934e219cffdd89d7d471335a717a18304427d99 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 May 2020 16:56:21 -0700 Subject: [PATCH 032/282] Fix build-wasm command --- cli/src/wasm.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 659c1cc17c..8c123e3b1b 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -83,10 +83,11 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu "src", ]); - let parser_c_path = src_dir.join("parser.c"); - let scanner_c_path = src_dir.join("scanner.c"); - let scanner_cc_path = src_dir.join("scanner.cc"); - let scanner_cpp_path = src_dir.join("scanner.cpp"); + let src = Path::new("src"); + let parser_c_path = src.join("parser.c"); + let scanner_c_path = src.join("scanner.c"); + let scanner_cc_path = src.join("scanner.cc"); + let scanner_cpp_path = src.join("scanner.cpp"); if scanner_cc_path.exists() { command.arg("-xc++").arg(&scanner_cc_path); From 40993195b8da0d042161a7b7bb0f6831dcfb4da1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 May 2020 15:14:43 -0700 Subject: [PATCH 033/282] Fix wasm tests on CI (#616) * wasm: Improve error message on missing language symbol * Fix source file existence checks in build-wasm command --- cli/src/wasm.rs | 6 +++--- lib/binding_web/binding.js | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 8c123e3b1b..47cea90ad2 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -89,11 +89,11 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu let scanner_cc_path = src.join("scanner.cc"); let scanner_cpp_path = src.join("scanner.cpp"); - if scanner_cc_path.exists() { + if language_dir.join(&scanner_cc_path).exists() { command.arg("-xc++").arg(&scanner_cc_path); - } else if scanner_cpp_path.exists() { + } else if language_dir.join(&scanner_cpp_path).exists() { command.arg("-xc++").arg(&scanner_cpp_path); - } else if scanner_c_path.exists() { + } else if language_dir.join(&scanner_c_path).exists() { command.arg(&scanner_c_path); } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index feedb37fe5..eb6d4d8aad 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -872,10 +872,14 @@ class Language { return bytes .then(bytes => loadWebAssemblyModule(bytes, {loadAsync: true})) .then(mod => { - const functionName = Object.keys(mod).find(key => + const symbolNames = Object.keys(mod) + const functionName = symbolNames.find(key => LANGUAGE_FUNCTION_REGEX.test(key) && !key.includes("external_scanner_") ); + if (!functionName) { + console.log(`Couldn't find language function in WASM file. Symbols:\n${JSON.stringify(symbolNames, null, 2)}`) + } const languageAddress = mod[functionName](); return new Language(INTERNAL, languageAddress); }); From b66d149b74d843d3993f00e07f618d953711f674 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 May 2020 15:56:49 -0700 Subject: [PATCH 034/282] Fix inconsistent whitespace after '{' in generated parser --- cli/src/generate/render.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f82f076549..d6191d9d79 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1022,7 +1022,7 @@ impl Generator { for (i, entry) in parse_table_entries { add!( self, - " [{}] = {{ .entry = {{.count = {}, .reusable = {}}} }},", + " [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},", i, entry.actions.len(), entry.reusable From 9d182bb0785f158ba5b6ab14df8fae0eff8aa819 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 May 2020 10:51:18 -0700 Subject: [PATCH 035/282] node-types: Fix bug w/ required property when multiple rules aliased as same --- cli/src/generate/node_types.rs | 211 +++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 92 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index b5bf1515ba..9c3bea6477 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -502,26 +502,34 @@ pub(crate) fn generate_node_types_json( // There may already be an entry with this name, because multiple // rules may be aliased with the same name. - let node_type_json = - node_types_json - .entry(kind.clone()) - .or_insert_with(|| NodeInfoJSON { - kind: kind.clone(), - named: is_named, - fields: Some(BTreeMap::new()), - children: None, - subtypes: None, - }); + let mut node_type_existed = true; + let node_type_json = node_types_json.entry(kind.clone()).or_insert_with(|| { + node_type_existed = false; + NodeInfoJSON { + kind: kind.clone(), + named: is_named, + fields: Some(BTreeMap::new()), + children: None, + subtypes: None, + } + }); let fields_json = node_type_json.fields.as_mut().unwrap(); - for (field, field_info) in info.fields.iter() { - populate_field_info_json( - &mut fields_json - .entry(field.clone()) - .or_insert(FieldInfoJSON::default()), - field_info, - ); + for (new_field, field_info) in info.fields.iter() { + let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| { + // If another rule is aliased with the same name, and does *not* have this field, + // then this field cannot be required. + let mut field_json = FieldInfoJSON::default(); + if node_type_existed { + field_json.required = false; + } + field_json + }); + populate_field_info_json(field_json, field_info); } + + // If another rule is aliased with the same name, any fields that aren't present in this + // cannot be required. for (existing_field, field_json) in fields_json.iter_mut() { if !info.fields.contains_key(existing_field) { field_json.required = false; @@ -1170,7 +1178,7 @@ mod tests { } #[test] - fn test_node_types_with_named_aliases() { + fn test_node_types_with_multiple_rules_same_alias_name() { let node_types = get_node_types(InputGrammar { name: String::new(), extra_symbols: Vec::new(), @@ -1181,98 +1189,117 @@ mod tests { supertype_symbols: vec![], variables: vec![ Variable { - name: "expression".to_string(), - kind: VariableType::Named, - rule: Rule::choice(vec![Rule::named("yield"), Rule::named("argument_list")]), - }, - Variable { - name: "yield".to_string(), - kind: VariableType::Named, - rule: Rule::Seq(vec![Rule::string("YIELD")]), - }, - Variable { - name: "argument_list".to_string(), + name: "script".to_string(), kind: VariableType::Named, rule: Rule::choice(vec![ - Rule::named("x"), - Rule::alias(Rule::named("b"), "expression".to_string(), true), + Rule::named("a"), + // Rule `b` is aliased as rule `a` + Rule::alias(Rule::named("b"), "a".to_string(), true), ]), }, Variable { - name: "b".to_string(), + name: "a".to_string(), kind: VariableType::Named, - rule: Rule::choice(vec![ - Rule::field("f".to_string(), Rule::string("B")), - Rule::named("c"), + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::string("1")), + Rule::field("f2".to_string(), Rule::string("2")), ]), }, Variable { - name: "c".to_string(), - kind: VariableType::Named, - rule: Rule::seq(vec![Rule::string("C")]), - }, - Variable { - name: "x".to_string(), + name: "b".to_string(), kind: VariableType::Named, - rule: Rule::seq(vec![Rule::string("X")]), + rule: Rule::seq(vec![ + Rule::field("f2".to_string(), Rule::string("22")), + Rule::field("f2".to_string(), Rule::string("222")), + Rule::field("f3".to_string(), Rule::string("3")), + ]), }, ], }); assert_eq!( - node_types.iter().map(|n| &n.kind).collect::>(), - &[ - "argument_list", - "c", - "expression", - "x", - "yield", - "B", - "C", - "X", - "YIELD" - ] + &node_types + .iter() + .map(|t| t.kind.as_str()) + .collect::>(), + &["a", "script", "1", "2", "22", "222", "3"] ); + assert_eq!( - node_types[2], - NodeInfoJSON { - kind: "expression".to_string(), - named: true, - subtypes: None, - children: Some(FieldInfoJSON { - multiple: false, - required: false, - types: vec![ - NodeTypeJSON { - kind: "argument_list".to_string(), - named: true, - }, - NodeTypeJSON { - kind: "c".to_string(), - named: true, - }, - NodeTypeJSON { - kind: "yield".to_string(), + &node_types[0..2], + &[ + // A combination of the types for `a` and `b`. + NodeInfoJSON { + kind: "a".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: false, + types: vec![NodeTypeJSON { + kind: "1".to_string(), + named: false, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: true, + required: true, + types: vec![ + NodeTypeJSON { + kind: "2".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "22".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "222".to_string(), + named: false, + } + ] + }, + ), + ( + "f3".to_string(), + FieldInfoJSON { + multiple: false, + required: false, + types: vec![NodeTypeJSON { + kind: "3".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ), + }, + NodeInfoJSON { + kind: "script".to_string(), + named: true, + subtypes: None, + // Only one node + children: Some(FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "a".to_string(), named: true, - }, - ] - }), - fields: Some( - vec![( - "f".to_string(), - FieldInfoJSON { - required: false, - multiple: false, - types: vec![NodeTypeJSON { - named: false, - kind: "B".to_string(), - }] - } - )] - .into_iter() - .collect() - ), - } + }] + }), + fields: Some(BTreeMap::new()), + } + ] ); } From 97018168d39bdfd8478f702b0a9141712f40ddc0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 May 2020 10:52:21 -0700 Subject: [PATCH 036/282] 0.16.7 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8baba27d75..378ee2986a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.16.6" +version = "0.16.7" dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 6904c42a95..76d8ede5af 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.16.6" +version = "0.16.7" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 8965d02053..c03a57bd6b 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.16.6", + "version": "0.16.7", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 38d32c018b4195068b071ebe93cc64c02e702e33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B5=D0=B9=20=D0=9F=D0=B0?= =?UTF-8?q?=D1=81=D1=82=D1=83=D1=85=D0=BE=D0=B2?= Date: Sat, 16 May 2020 01:02:39 +0200 Subject: [PATCH 037/282] add Rust into languages list for corpus tests (#619) --- cli/src/tests/corpus_test.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index fd2ed90828..e201d74310 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -24,6 +24,7 @@ const LANGUAGES: &'static [&'static str] = &[ "json", "php", "python", + "rust", ]; lazy_static! { From 37ee7acc9e2ba7768a2523522cfaeab84600361f Mon Sep 17 00:00:00 2001 From: lerencao Date: Sat, 16 May 2020 14:56:40 +0800 Subject: [PATCH 038/282] [cli]: add an option to no open browser in web-ui command (#620) --- cli/src/main.rs | 8 +++++--- cli/src/web_ui.rs | 13 ++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index c5c0e0e07c..757c70eb6d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -149,7 +149,8 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("path").index(1).multiple(true)), ) .subcommand( - SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"), + SubCommand::with_name("web-ui").about("Test a parser interactively in the browser") + .arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")), ) .subcommand( SubCommand::with_name("dump-languages") @@ -318,8 +319,9 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("build-wasm") { let grammar_path = current_dir.join(matches.value_of("path").unwrap_or("")); wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?; - } else if matches.subcommand_matches("web-ui").is_some() { - web_ui::serve(¤t_dir); + } else if let Some(matches) = matches.subcommand_matches("web-ui") { + let open_in_browser = !matches.is_present("quiet"); + web_ui::serve(¤t_dir, open_in_browser); } else if matches.subcommand_matches("dump-languages").is_some() { loader.find_all_languages(&config.parser_directories)?; for (configuration, language_path) in loader.get_all_language_configurations() { diff --git a/cli/src/web_ui.rs b/cli/src/web_ui.rs index bfde94a145..7d4c7eec23 100644 --- a/cli/src/web_ui.rs +++ b/cli/src/web_ui.rs @@ -43,7 +43,7 @@ resource!(get_playground_js, "docs/assets/js/playground.js"); posix_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); posix_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); -pub fn serve(grammar_path: &Path) { +pub fn serve(grammar_path: &Path, open_in_browser: bool) { let port = get_available_port().expect("Couldn't find an available port"); let url = format!("127.0.0.1:{}", port); let server = Server::http(&url).expect("Failed to start web server"); @@ -59,12 +59,11 @@ pub fn serve(grammar_path: &Path) { ) })) .unwrap(); - - webbrowser::open(&format!("http://127.0.0.1:{}", port)) - .map_err(Error::wrap(|| { - format!("Failed to open '{}' in a web browser", url) - })) - .unwrap(); + if open_in_browser { + if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) { + eprintln!("Failed to open '{}' in a web browser", url); + } + } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); let main_html = String::from_utf8(get_main_html(&tree_sitter_dir)) From 02196f8ae91fa8ff76fed5b7c8ba0c6fc689c673 Mon Sep 17 00:00:00 2001 From: Kenneth Skovhus Date: Sun, 17 May 2020 20:33:53 +0200 Subject: [PATCH 039/282] Correct SyntaxNode isNamed type --- lib/binding_web/tree-sitter-web.d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 7ddae95216..1d6f37610f 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -50,7 +50,6 @@ declare module 'web-tree-sitter' { export interface SyntaxNode { tree: Tree; type: string; - isNamed: boolean; text: string; startPosition: Point; endPosition: Point; @@ -74,6 +73,7 @@ declare module 'web-tree-sitter' { hasError(): boolean; equals(other: SyntaxNode): boolean; isMissing(): boolean; + isNamed(): boolean; toString(): string; child(index: number): SyntaxNode | null; namedChild(index: number): SyntaxNode | null; From 7b39420de37f64c5efc80a6711a86c67dcea38b3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 18 May 2020 10:48:47 -0700 Subject: [PATCH 040/282] Make it easy to build with address sanitizer in test script --- script/test | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/script/test b/script/test index 5fda7cb26d..bcc88e24f4 100755 --- a/script/test +++ b/script/test @@ -37,12 +37,26 @@ export RUST_BACKTRACE=full mode=normal -while getopts "dDghl:e:s:t:" option; do +# Specify a `--target` explicitly. For some reason, this is required for +# address sanitizer support. +toolchain=$(rustup show active-toolchain) +toolchain_regex='(stable|beta|nightly)-([_a-z0-9-]+).*' +if [[ $toolchain =~ $toolchain_regex ]]; then + release=${BASH_REMATCH[1]} + current_target=${BASH_REMATCH[2]} +else + echo "Failed to parse toolchain '${toolchain}'" +fi + +while getopts "adDghl:e:s:t:" option; do case ${option} in h) usage exit ;; + a) + export RUSTFLAGS="-Z sanitizer=address" + ;; l) export TREE_SITTER_TEST_LANGUAGE_FILTER=${OPTARG} ;; @@ -82,5 +96,5 @@ if [[ "${mode}" == "debug" ]]; then ) lldb "${test_binary}" -- $top_level_filter else - cargo test -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture + cargo test --target=${current_target} -p tree-sitter-cli --jobs 1 $top_level_filter -- --nocapture fi From 462c86903f60e047849cd1abf490937a35a40a8a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 18 May 2020 13:40:24 -0700 Subject: [PATCH 041/282] Improve tree queries' ability to handle large numbers of nested matches (#624) * query: Acquire capture lists lazily, allow more concurrent states * Fix some static analysis warnings --- cli/src/tests/query_test.rs | 43 +++++++- lib/src/query.c | 210 +++++++++++++++++++++--------------- 2 files changed, 165 insertions(+), 88 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5c98c959a4..5dbfea18c0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -339,7 +339,7 @@ fn test_query_matches_with_nesting_and_no_fields() { } #[test] -fn test_query_matches_with_many() { +fn test_query_matches_with_many_results() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new(language, "(array (identifier) @element)").unwrap(); @@ -353,6 +353,47 @@ fn test_query_matches_with_many() { }); } +#[test] +fn test_query_matches_with_many_overlapping_results() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @method)) + (call_expression + function: (identifier) @function) + ((identifier) @constant + (#match? @constant "[A-Z\\d_]+")) + "# + ).unwrap(); + + let count = 80; + + // Deeply nested chained function calls: + // a + // .foo(bar(BAZ)) + // .foo(bar(BAZ)) + // .foo(bar(BAZ)) + // ... + let mut source = "a".to_string(); + source += &"\n .foo(bar(BAZ))".repeat(count); + + assert_query_matches( + language, + &query, + &source, + &[ + (0, vec![("method", "foo")]), + (1, vec![("function", "bar")]), + (2, vec![("constant", "BAZ")]) + ].iter().cloned().cycle().take(3 * count).collect::>(), + ); + }); +} + #[test] fn test_query_matches_capturing_error_nodes() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 5c06ed0fcb..89b5e4b575 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -8,6 +8,10 @@ #include "./unicode.h" #include +#define MAX_STATE_COUNT 256 +#define MAX_CAPTURE_LIST_COUNT 32 +#define MAX_STEP_CAPTURE_COUNT 3 + /* * Stream - A sequence of unicode characters derived from a UTF8 string. * This struct is used in parsing queries from S-expressions. @@ -19,8 +23,6 @@ typedef struct { uint8_t next_size; } Stream; -#define MAX_STEP_CAPTURE_COUNT 4 - /* * QueryStep - A step in the process of matching a query. Each node within * a query S-expression maps to one of these steps. An entire pattern is @@ -43,7 +45,7 @@ typedef struct { TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; uint16_t alternative_index; - uint8_t depth; + uint16_t depth; bool contains_captures: 1; bool is_pattern_start: 1; bool is_immediate: 1; @@ -111,10 +113,10 @@ typedef struct { typedef struct { uint32_t id; uint16_t start_depth; - uint16_t pattern_index; uint16_t step_index; - uint16_t consumed_capture_count; - uint8_t capture_list_id; + uint16_t pattern_index; + uint16_t capture_list_id; + uint16_t consumed_capture_count: 14; bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; } QueryState; @@ -128,7 +130,8 @@ typedef Array(TSQueryCapture) CaptureList; * are currently in use. */ typedef struct { - CaptureList list[32]; + CaptureList list[MAX_CAPTURE_LIST_COUNT]; + CaptureList empty_list; uint32_t usage_map; } CaptureListPool; @@ -169,11 +172,10 @@ struct TSQueryCursor { }; static const TSQueryError PARENT_DONE = -1; -static const uint8_t PATTERN_DONE_MARKER = UINT8_MAX; +static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1; -static const uint16_t MAX_STATE_COUNT = 32; // #define LOG(...) fprintf(stderr, __VA_ARGS__) #define LOG(...) @@ -259,24 +261,31 @@ static void stream_scan_identifier(Stream *stream) { static CaptureListPool capture_list_pool_new() { return (CaptureListPool) { + .empty_list = array_new(), .usage_map = UINT32_MAX, }; } static void capture_list_pool_reset(CaptureListPool *self) { self->usage_map = UINT32_MAX; - for (unsigned i = 0; i < 32; i++) { + for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { array_clear(&self->list[i]); } } static void capture_list_pool_delete(CaptureListPool *self) { - for (unsigned i = 0; i < 32; i++) { + for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { array_delete(&self->list[i]); } } -static CaptureList *capture_list_pool_get(CaptureListPool *self, uint16_t id) { +static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { + if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list; + return &self->list[id]; +} + +static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { + assert(id < MAX_CAPTURE_LIST_COUNT); return &self->list[id]; } @@ -290,12 +299,14 @@ static uint16_t capture_list_pool_acquire(CaptureListPool *self) { // the leading zeros in the usage map. An id of zero corresponds to the // highest-order bit in the bitmask. uint16_t id = count_leading_zeros(self->usage_map); - if (id == 32) return NONE; + if (id >= MAX_CAPTURE_LIST_COUNT) return NONE; self->usage_map &= ~bitmask_for_index(id); + array_clear(&self->list[id]); return id; } static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { + if (id >= MAX_CAPTURE_LIST_COUNT) return; array_clear(&self->list[id]); self->usage_map |= bitmask_for_index(id); } @@ -433,7 +444,7 @@ static QueryStep query_step__new( .symbol = symbol, .depth = depth, .field = 0, - .capture_ids = {NONE, NONE, NONE, NONE}, + .capture_ids = {NONE, NONE, NONE}, .alternative_index = NONE, .contains_captures = false, .is_last = false, @@ -1195,7 +1206,7 @@ TSQueryCursor *ts_query_cursor_new() { .end_point = POINT_MAX, }; array_reserve(&self->states, MAX_STATE_COUNT); - array_reserve(&self->finished_states, MAX_STATE_COUNT); + array_reserve(&self->finished_states, MAX_CAPTURE_LIST_COUNT); return self; } @@ -1257,6 +1268,9 @@ static bool ts_query_cursor__first_in_progress_capture( uint32_t *pattern_index ) { bool result = false; + *state_index = UINT32_MAX; + *byte_offset = UINT32_MAX; + *pattern_index = UINT32_MAX; for (unsigned i = 0; i < self->states.size; i++) { const QueryState *state = &self->states.contents[i]; const CaptureList *captures = capture_list_pool_get( @@ -1268,10 +1282,7 @@ static bool ts_query_cursor__first_in_progress_capture( if ( !result || capture_byte < *byte_offset || - ( - capture_byte == *byte_offset && - state->pattern_index < *pattern_index - ) + (capture_byte == *byte_offset && state->pattern_index < *pattern_index) ) { result = true; *state_index = i; @@ -1306,11 +1317,11 @@ void ts_query_cursor__compare_captures( bool *left_contains_right, bool *right_contains_left ) { - CaptureList *left_captures = capture_list_pool_get( + const CaptureList *left_captures = capture_list_pool_get( &self->capture_list_pool, left_state->capture_list_id ); - CaptureList *right_captures = capture_list_pool_get( + const CaptureList *right_captures = capture_list_pool_get( &self->capture_list_pool, right_state->capture_list_id ); @@ -1360,40 +1371,18 @@ static bool ts_query_cursor__add_state( TSQueryCursor *self, const PatternEntry *pattern ) { - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - - uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool); - - // If there are no capture lists left in the pool, then terminate whichever - // state has captured the earliest node in the document, and steal its - // capture list. - if (list_id == NONE) { - uint32_t state_index, byte_offset, pattern_index; - if (ts_query_cursor__first_in_progress_capture( - self, - &state_index, - &byte_offset, - &pattern_index - )) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - state_index, pattern_index, byte_offset - ); - list_id = self->states.contents[state_index].capture_list_id; - array_erase(&self->states, state_index); - } else { - LOG(" too many finished states.\n"); - return false; - } + if (self->states.size >= MAX_STATE_COUNT) { + LOG(" too many states"); + return false; } - LOG( " start state. pattern:%u, step:%u\n", pattern->pattern_index, pattern->step_index ); + QueryStep *step = &self->query->steps.contents[pattern->step_index]; array_push(&self->states, ((QueryState) { - .capture_list_id = list_id, + .capture_list_id = NONE, .step_index = pattern->step_index, .pattern_index = pattern->pattern_index, .start_depth = self->depth - step->depth, @@ -1409,23 +1398,34 @@ static QueryState *ts_query__cursor_copy_state( TSQueryCursor *self, const QueryState *state ) { - uint32_t new_list_id = capture_list_pool_acquire(&self->capture_list_pool); - if (new_list_id == NONE) return NULL; - uint32_t index = (state - self->states.contents) + 1; + if (self->states.size >= MAX_STATE_COUNT) { + LOG(" too many states"); + return NULL; + } + + // If the state has captures, copy its capture list. QueryState copy = *state; + copy.capture_list_id = state->capture_list_id; + if (state->capture_list_id != NONE) { + copy.capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); + if (copy.capture_list_id == NONE) { + LOG(" too many capture lists"); + return NULL; + } + const CaptureList *old_captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + CaptureList *new_captures = capture_list_pool_get_mut( + &self->capture_list_pool, + copy.capture_list_id + ); + array_push_all(new_captures, old_captures); + } + + uint32_t index = (state - self->states.contents) + 1; array_insert(&self->states, index, copy); - QueryState *new_state = &self->states.contents[index]; - new_state->capture_list_id = new_list_id; - CaptureList *old_captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - CaptureList *new_captures = capture_list_pool_get( - &self->capture_list_pool, - new_list_id - ); - array_push_all(new_captures, old_captures); - return new_state; + return &self->states.contents[index]; } // Walk the tree, processing patterns until at least one pattern finishes, @@ -1562,10 +1562,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } // Update all of the in-progress states with current node. - for (unsigned i = 0; i < self->states.size; i++) { + for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) { QueryState *state = &self->states.contents[i]; QueryStep *step = &self->query->steps.contents[state->step_index]; state->has_in_progress_alternatives = false; + copy_count = 0; // Check that the node matches all of the criteria for the next // step of the pattern. @@ -1629,25 +1630,61 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { state->pattern_index, state->step_index ); - i++; + copy_count++; } } // If the current node is captured in this pattern, add it to the capture list. - CaptureList *capture_list = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id - ); - for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { - uint16_t capture_id = step->capture_ids[j]; - if (step->capture_ids[j] == NONE) break; - array_push(capture_list, ((TSQueryCapture) { node, capture_id })); - LOG( - " capture node. pattern:%u, capture_id:%u, capture_count:%u\n", - state->pattern_index, - capture_id, - capture_list->size + // For the first capture in a pattern, lazily acquire a capture list. + if (step->capture_ids[0] != NONE) { + if (state->capture_list_id == NONE) { + state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); + + // If there are no capture lists left in the pool, then terminate whichever + // state has captured the earliest node in the document, and steal its + // capture list. + if (state->capture_list_id == NONE) { + uint32_t state_index, byte_offset, pattern_index; + if (ts_query_cursor__first_in_progress_capture( + self, + &state_index, + &byte_offset, + &pattern_index + )) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + state_index, pattern_index, byte_offset + ); + state->capture_list_id = self->states.contents[state_index].capture_list_id; + array_erase(&self->states, state_index); + if (state_index < i) { + i--; + state--; + } + } else { + LOG(" too many finished states.\n"); + array_erase(&self->states, i); + i--; + continue; + } + } + } + + CaptureList *capture_list = capture_list_pool_get_mut( + &self->capture_list_pool, + state->capture_list_id ); + for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { + uint16_t capture_id = step->capture_ids[j]; + if (step->capture_ids[j] == NONE) break; + array_push(capture_list, ((TSQueryCapture) { node, capture_id })); + LOG( + " capture node. pattern:%u, capture_id:%u, capture_count:%u\n", + state->pattern_index, + capture_id, + capture_list->size + ); + } } // Advance this state to the next step of its pattern. @@ -1663,9 +1700,8 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // or is the end of a repetition), then copy the state in order to pursue both // alternatives. The alternative step itself may have an alternative, so this is // an interative process. - unsigned start_index = state - self->states.contents; - unsigned end_index = start_index + 1; - for (unsigned j = start_index; j < end_index; j++) { + unsigned end_index = i + 1; + for (unsigned j = i; j < end_index; j++) { QueryState *state = &self->states.contents[j]; QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->alternative_index != NONE) { @@ -1675,7 +1711,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { j--; } if (copy) { - i++; + copy_count++; end_index++; copy->step_index = next_step->alternative_index; if (next_step->alternative_is_immediate) { @@ -1787,7 +1823,7 @@ bool ts_query_cursor_next_match( QueryState *state = &self->finished_states.contents[0]; match->id = state->id; match->pattern_index = state->pattern_index; - CaptureList *captures = capture_list_pool_get( + const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); @@ -1829,8 +1865,8 @@ bool ts_query_cursor_next_capture( // First, identify the position of the earliest capture in an unfinished // match. For a finished capture to be returned, it must be *before* // this position. - uint32_t first_unfinished_capture_byte = UINT32_MAX; - uint32_t first_unfinished_pattern_index = UINT32_MAX; + uint32_t first_unfinished_capture_byte; + uint32_t first_unfinished_pattern_index; uint32_t first_unfinished_state_index; ts_query_cursor__first_in_progress_capture( self, @@ -1845,7 +1881,7 @@ bool ts_query_cursor_next_capture( uint32_t first_finished_pattern_index = first_unfinished_pattern_index; for (unsigned i = 0; i < self->finished_states.size; i++) { const QueryState *state = &self->finished_states.contents[i]; - CaptureList *captures = capture_list_pool_get( + const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); @@ -1883,7 +1919,7 @@ bool ts_query_cursor_next_capture( ]; match->id = state->id; match->pattern_index = state->pattern_index; - CaptureList *captures = capture_list_pool_get( + const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); From e8e80b1cf1bc2ec4f6663bbcfbdfa0ee039d9de6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 19 May 2020 16:26:04 -0700 Subject: [PATCH 042/282] docs: Use new predicate syntax in highlighting query examples --- docs/section-4-syntax-highlighting.md | 4 ++-- highlight/src/lib.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index 85ccfb6242..0182704b5d 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -224,7 +224,7 @@ The capture names are as follows: When highlighting a file, Tree-sitter will keep track of the set of scopes that contains any given position, and the set of definitions within each scope. When processing a syntax node that is captured as a `local.reference`, Tree-sitter will try to find a definition for a name that matches the node's text. If it finds a match, Tree-sitter will ensure that the *reference* and the *definition* are colored the same. -The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes which have been identified as local variables by adding the predicate `(is-not? local)` to the pattern. This is used in the example below: +The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes which have been identified as local variables by adding the predicate `(#is-not? local)` to the pattern. This is used in the example below: #### Example Input @@ -299,7 +299,7 @@ Let's write some queries that let us clearly distinguish between these types of (block_parameters (identifier) @variable.parameter) ((identifier) @function.method - (is-not? local)) + (#is-not? local)) ``` Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*: diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 6f1b7bbdbf..d2e27b46df 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1055,7 +1055,7 @@ fn injection_for_match<'a>( for prop in query.property_settings(query_match.pattern_index) { match prop.key.as_ref() { // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `set!` predicate + // captured node, it can also be hard-coded via a `#set!` predicate // that sets the injection.language key. "injection.language" => { if language_name.is_none() { @@ -1065,7 +1065,7 @@ fn injection_for_match<'a>( // By default, injections do not include the *children* of an // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `set!` predicate that + // node itself. This can be changed using a `#set!` predicate that // sets the `injection.include-children` key. "injection.include-children" => include_children = true, _ => {} From 911fb7f1b2e746f2b103973b89bec89841bb1216 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 May 2020 13:39:11 -0700 Subject: [PATCH 043/282] Extract helper functions to reduce the code size of the lexer function (#626) * Extract helper functions to reduce code size of ts_lex * Name char set helper functions based on token name --- cli/src/generate/nfa.rs | 77 +++++++++ cli/src/generate/render.rs | 314 +++++++++++++++++++++---------------- 2 files changed, 257 insertions(+), 134 deletions(-) diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs index abab873964..bf9ca58d93 100644 --- a/cli/src/generate/nfa.rs +++ b/cli/src/generate/nfa.rs @@ -1,8 +1,10 @@ use std::char; use std::cmp::max; use std::cmp::Ordering; +use std::collections::HashSet; use std::fmt; use std::mem::swap; +use std::ops::Range; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum CharacterSet { @@ -178,6 +180,40 @@ impl CharacterSet { } } + pub fn ranges<'a>( + chars: &'a Vec, + ruled_out_characters: &'a HashSet, + ) -> impl Iterator> + 'a { + let mut prev_range: Option> = None; + chars + .iter() + .map(|c| (*c, false)) + .chain(Some(('\0', true))) + .filter_map(move |(c, done)| { + if done { + return prev_range.clone(); + } + if ruled_out_characters.contains(&(c as u32)) { + return None; + } + if let Some(range) = prev_range.clone() { + let mut prev_range_successor = range.end as u32 + 1; + while prev_range_successor < c as u32 { + if !ruled_out_characters.contains(&prev_range_successor) { + prev_range = Some(c..c); + return Some(range); + } + prev_range_successor += 1; + } + prev_range = Some(range.start..c); + None + } else { + prev_range = Some(c..c); + None + } + }) + } + #[cfg(test)] pub fn contains(&self, c: char) -> bool { match self { @@ -825,4 +861,45 @@ mod tests { assert!(a.does_intersect(&b)); assert!(b.does_intersect(&a)); } + + #[test] + fn test_character_set_get_ranges() { + struct Row { + chars: Vec, + ruled_out_chars: Vec, + expected_ranges: Vec>, + } + + let table = [ + Row { + chars: vec!['a'], + ruled_out_chars: vec![], + expected_ranges: vec!['a'..'a'], + }, + Row { + chars: vec!['a', 'b', 'c', 'e', 'z'], + ruled_out_chars: vec![], + expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], + }, + Row { + chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], + ruled_out_chars: vec!['d', 'f', 'g'], + expected_ranges: vec!['a'..'h', 'z'..'z'], + }, + ]; + + for Row { + chars, + ruled_out_chars, + expected_ranges, + } in table.iter() + { + let ruled_out_chars = ruled_out_chars + .into_iter() + .map(|c: &char| *c as u32) + .collect(); + let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + assert_eq!(ranges, *expected_ranges); + } + } } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index d6191d9d79..f4a4bfc49f 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -15,6 +15,8 @@ use std::mem::swap; // stabilized, and the parser generation does not use it by default. const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1; +const LARGE_CHARACTER_RANGE_COUNT: usize = 8; + macro_rules! add { ($this: tt, $($arg: tt)*) => {{ $this.buffer.write_fmt(format_args!($($arg)*)).unwrap(); @@ -72,6 +74,12 @@ struct Generator { next_abi: bool, } +struct TransitionSummary { + is_included: bool, + ranges: Vec>, + call_id: Option, +} + impl Generator { fn generate(mut self) -> String { self.init(); @@ -99,12 +107,12 @@ impl Generator { let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); - self.add_lex_function("ts_lex", main_lex_table); + self.add_lex_function("ts_lex", main_lex_table, true); if self.keyword_capture_token.is_some() { let mut keyword_lex_table = LexTable::default(); swap(&mut keyword_lex_table, &mut self.keyword_lex_table); - self.add_lex_function("ts_lex_keywords", keyword_lex_table); + self.add_lex_function("ts_lex_keywords", keyword_lex_table, false); } self.add_lex_modes_list(); @@ -570,7 +578,100 @@ impl Generator { add_line!(self, ""); } - fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { + fn add_lex_function( + &mut self, + name: &str, + lex_table: LexTable, + extract_helper_functions: bool, + ) { + let mut ruled_out_chars = HashSet::new(); + let mut large_character_sets = Vec::<(Symbol, usize, Vec>)>::new(); + + // For each lex state, compute a summary of the code that needs to be + // generated. + let state_transition_summaries: Vec> = lex_table + .states + .iter() + .map(|state| { + ruled_out_chars.clear(); + + // For each state transition, compute the set of character ranges + // that need to be checked. + state + .advance_actions + .iter() + .map(|(chars, action)| { + let (chars, is_included) = match chars { + CharacterSet::Include(c) => (c, true), + CharacterSet::Exclude(c) => (c, false), + }; + let mut call_id = None; + let mut ranges = + CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + if is_included { + ruled_out_chars.extend(chars.iter().map(|c| *c as u32)); + } else { + ranges.insert(0, '\0'..'\0') + } + + // Record any large character sets so that they can be extracted + // into helper functions, reducing code duplication. + if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT { + let char_set_symbol = self + .symbol_for_advance_action(action, &lex_table) + .expect("No symbol for lex state"); + let mut count_for_symbol = 0; + for (i, (symbol, _, r)) in large_character_sets.iter().enumerate() { + if r == &ranges { + call_id = Some(i); + break; + } + if *symbol == char_set_symbol { + count_for_symbol += 1; + } + } + if call_id.is_none() { + call_id = Some(large_character_sets.len()); + large_character_sets.push(( + char_set_symbol, + count_for_symbol + 1, + ranges.clone(), + )); + } + } + + TransitionSummary { + is_included, + ranges, + call_id, + } + }) + .collect() + }) + .collect(); + + // Generate a helper function for each large character set. + let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect(); + sorted_large_char_sets.sort_unstable_by_key(|(sym, count, _)| (sym, count)); + for (sym, count, ranges) in sorted_large_char_sets { + add_line!( + self, + "static inline bool {}_character_set_{}(int32_t lookahead) {{", + self.symbol_ids[sym], + count + ); + indent!(self); + add_line!(self, "return"); + indent!(self); + add_whitespace!(self); + self.add_character_range_conditions(ranges, true, 0); + add!(self, ";\n"); + dedent!(self); + dedent!(self); + add_line!(self, "}}"); + add_line!(self, ""); + } + add_line!( self, "static bool {}(TSLexer *lexer, TSStateId state) {{", @@ -591,7 +692,7 @@ impl Generator { for (i, state) in lex_table.states.into_iter().enumerate() { add_line!(self, "case {}:", i); indent!(self); - self.add_lex_state(state); + self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets); dedent!(self); } @@ -607,7 +708,35 @@ impl Generator { add_line!(self, ""); } - fn add_lex_state(&mut self, state: LexState) { + fn symbol_for_advance_action( + &self, + action: &AdvanceAction, + lex_table: &LexTable, + ) -> Option { + let mut state_ids = vec![action.state]; + let mut i = 0; + while i < state_ids.len() { + let id = state_ids[i]; + let state = &lex_table.states[id]; + if let Some(accept) = state.accept_action { + return Some(accept); + } + for (_, action) in &state.advance_actions { + if !state_ids.contains(&action.state) { + state_ids.push(action.state); + } + } + i += 1; + } + return None; + } + + fn add_lex_state( + &mut self, + state: LexState, + transition_info: &Vec, + large_character_sets: &Vec<(Symbol, usize, Vec>)>, + ) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); } @@ -616,92 +745,89 @@ impl Generator { add_line!(self, "if (eof) ADVANCE({});", eof_action.state); } - let mut ruled_out_characters = HashSet::new(); - for (characters, action) in state.advance_actions { - let previous_length = self.buffer.len(); - + for (i, (_, action)) in state.advance_actions.into_iter().enumerate() { + let transition = &transition_info[i]; add_whitespace!(self); - add!(self, "if ("); - if self.add_character_set_condition(&characters, &ruled_out_characters) { - add!(self, ") "); - self.add_advance_action(&action); - if let CharacterSet::Include(chars) = characters { - ruled_out_characters.extend(chars.iter().map(|c| *c as u32)); + + // If there is a helper function for this transition's character + // set, then generate a call to that helper function. + if let Some(call_id) = transition.call_id { + add!(self, "if ("); + if !transition.is_included { + add!(self, "!"); } - } else { - self.buffer.truncate(previous_length); - self.add_advance_action(&action); + let (symbol, count, _) = &large_character_sets[call_id]; + add!( + self, + "{}_character_set_{}(lookahead)) ", + self.symbol_ids[symbol], + count + ); } + // Otherwise, generate code to compare the lookahead character + // with all of the character ranges. + else if transition.ranges.len() > 0 { + add!(self, "if ("); + self.add_character_range_conditions(&transition.ranges, transition.is_included, 2); + add!(self, ") "); + } + self.add_advance_action(&action); add!(self, "\n"); } add_line!(self, "END_STATE();"); } - fn add_character_set_condition( + fn add_character_range_conditions( &mut self, - characters: &CharacterSet, - ruled_out_characters: &HashSet, + ranges: &[Range], + is_included: bool, + indent_count: usize, ) -> bool { - match characters { - CharacterSet::Include(chars) => { - let ranges = Self::get_ranges(chars, ruled_out_characters); - self.add_character_range_conditions(ranges, false) - } - CharacterSet::Exclude(chars) => { - let ranges = Some('\0'..'\0') - .into_iter() - .chain(Self::get_ranges(chars, ruled_out_characters)); - self.add_character_range_conditions(ranges, true) - } + let mut line_break = "\n".to_string(); + for _ in 0..self.indent_level + indent_count { + line_break.push_str(" "); } - } - fn add_character_range_conditions( - &mut self, - ranges: impl Iterator>, - is_negated: bool, - ) -> bool { - let line_break = "\n "; let mut did_add = false; for range in ranges { - if is_negated { + if is_included { if did_add { - add!(self, " &&{}", line_break); + add!(self, " ||{}", line_break); } if range.end == range.start { - add!(self, "lookahead != "); + add!(self, "lookahead == "); self.add_character(range.start); } else if range.end as u32 == range.start as u32 + 1 { - add!(self, "lookahead != "); + add!(self, "lookahead == "); self.add_character(range.start); - add!(self, " &&{}lookahead != ", line_break); + add!(self, " ||{}lookahead == ", line_break); self.add_character(range.end); } else { - add!(self, "(lookahead < "); + add!(self, "("); self.add_character(range.start); - add!(self, " || "); + add!(self, " <= lookahead && lookahead <= "); self.add_character(range.end); - add!(self, " < lookahead)"); + add!(self, ")"); } } else { if did_add { - add!(self, " ||{}", line_break); + add!(self, " &&{}", line_break); } if range.end == range.start { - add!(self, "lookahead == "); + add!(self, "lookahead != "); self.add_character(range.start); } else if range.end as u32 == range.start as u32 + 1 { - add!(self, "lookahead == "); + add!(self, "lookahead != "); self.add_character(range.start); - add!(self, " ||{}lookahead == ", line_break); + add!(self, " &&{}lookahead != ", line_break); self.add_character(range.end); } else { - add!(self, "("); + add!(self, "(lookahead < "); self.add_character(range.start); - add!(self, " <= lookahead && lookahead <= "); + add!(self, " || "); self.add_character(range.end); - add!(self, ")"); + add!(self, " < lookahead)"); } } did_add = true; @@ -709,40 +835,6 @@ impl Generator { did_add } - fn get_ranges<'a>( - chars: &'a Vec, - ruled_out_characters: &'a HashSet, - ) -> impl Iterator> + 'a { - let mut prev_range: Option> = None; - chars - .iter() - .map(|c| (*c, false)) - .chain(Some(('\0', true))) - .filter_map(move |(c, done)| { - if done { - return prev_range.clone(); - } - if ruled_out_characters.contains(&(c as u32)) { - return None; - } - if let Some(range) = prev_range.clone() { - let mut prev_range_successor = range.end as u32 + 1; - while prev_range_successor < c as u32 { - if !ruled_out_characters.contains(&prev_range_successor) { - prev_range = Some(c..c); - return Some(range); - } - prev_range_successor += 1; - } - prev_range = Some(range.start..c); - None - } else { - prev_range = Some(c..c); - None - } - }) - } - fn add_advance_action(&mut self, action: &AdvanceAction) { if action.in_main_token { add!(self, "ADVANCE({});", action.state); @@ -1436,49 +1528,3 @@ pub(crate) fn render_c_code( } .generate() } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_get_char_ranges() { - struct Row { - chars: Vec, - ruled_out_chars: Vec, - expected_ranges: Vec>, - } - - let table = [ - Row { - chars: vec!['a'], - ruled_out_chars: vec![], - expected_ranges: vec!['a'..'a'], - }, - Row { - chars: vec!['a', 'b', 'c', 'e', 'z'], - ruled_out_chars: vec![], - expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], - }, - Row { - chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], - ruled_out_chars: vec!['d', 'f', 'g'], - expected_ranges: vec!['a'..'h', 'z'..'z'], - }, - ]; - - for Row { - chars, - ruled_out_chars, - expected_ranges, - } in table.iter() - { - let ruled_out_chars = ruled_out_chars - .into_iter() - .map(|c: &char| *c as u32) - .collect(); - let ranges = Generator::get_ranges(chars, &ruled_out_chars).collect::>(); - assert_eq!(ranges, *expected_ranges); - } - } -} From ec870e9e66c34354133ad865dd12fbaceb021083 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 May 2020 16:37:45 -0700 Subject: [PATCH 044/282] Avoid extracting helpers for char sets that are only used once --- cli/src/generate/render.rs | 95 ++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f4a4bfc49f..270bd00d4d 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -80,6 +80,13 @@ struct TransitionSummary { call_id: Option, } +struct LargeCharacterSetInfo { + ranges: Vec>, + symbol: Symbol, + index: usize, + usage_count: usize, +} + impl Generator { fn generate(mut self) -> String { self.init(); @@ -585,7 +592,7 @@ impl Generator { extract_helper_functions: bool, ) { let mut ruled_out_chars = HashSet::new(); - let mut large_character_sets = Vec::<(Symbol, usize, Vec>)>::new(); + let mut large_character_sets = Vec::::new(); // For each lex state, compute a summary of the code that needs to be // generated. @@ -621,22 +628,24 @@ impl Generator { .symbol_for_advance_action(action, &lex_table) .expect("No symbol for lex state"); let mut count_for_symbol = 0; - for (i, (symbol, _, r)) in large_character_sets.iter().enumerate() { - if r == &ranges { + for (i, info) in large_character_sets.iter_mut().enumerate() { + if info.ranges == ranges { call_id = Some(i); + info.usage_count += 1; break; } - if *symbol == char_set_symbol { + if info.symbol == char_set_symbol { count_for_symbol += 1; } } if call_id.is_none() { call_id = Some(large_character_sets.len()); - large_character_sets.push(( - char_set_symbol, - count_for_symbol + 1, - ranges.clone(), - )); + large_character_sets.push(LargeCharacterSetInfo { + symbol: char_set_symbol, + index: count_for_symbol + 1, + ranges: ranges.clone(), + usage_count: 1, + }); } } @@ -652,24 +661,26 @@ impl Generator { // Generate a helper function for each large character set. let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect(); - sorted_large_char_sets.sort_unstable_by_key(|(sym, count, _)| (sym, count)); - for (sym, count, ranges) in sorted_large_char_sets { - add_line!( - self, - "static inline bool {}_character_set_{}(int32_t lookahead) {{", - self.symbol_ids[sym], - count - ); - indent!(self); - add_line!(self, "return"); - indent!(self); - add_whitespace!(self); - self.add_character_range_conditions(ranges, true, 0); - add!(self, ";\n"); - dedent!(self); - dedent!(self); - add_line!(self, "}}"); - add_line!(self, ""); + sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index)); + for info in sorted_large_char_sets { + if info.usage_count > 1 { + add_line!( + self, + "static inline bool {}_character_set_{}(int32_t lookahead) {{", + self.symbol_ids[&info.symbol], + info.index + ); + indent!(self); + add_line!(self, "return"); + indent!(self); + add_whitespace!(self); + self.add_character_range_conditions(&info.ranges, true, 0); + add!(self, ";\n"); + dedent!(self); + dedent!(self); + add_line!(self, "}}"); + add_line!(self, ""); + } } add_line!( @@ -735,7 +746,7 @@ impl Generator { &mut self, state: LexState, transition_info: &Vec, - large_character_sets: &Vec<(Symbol, usize, Vec>)>, + large_character_sets: &Vec, ) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); @@ -752,21 +763,27 @@ impl Generator { // If there is a helper function for this transition's character // set, then generate a call to that helper function. if let Some(call_id) = transition.call_id { - add!(self, "if ("); - if !transition.is_included { - add!(self, "!"); + let info = &large_character_sets[call_id]; + if info.usage_count > 1 { + add!(self, "if ("); + if !transition.is_included { + add!(self, "!"); + } + add!( + self, + "{}_character_set_{}(lookahead)) ", + self.symbol_ids[&info.symbol], + info.index + ); + self.add_advance_action(&action); + add!(self, "\n"); + continue; } - let (symbol, count, _) = &large_character_sets[call_id]; - add!( - self, - "{}_character_set_{}(lookahead)) ", - self.symbol_ids[symbol], - count - ); } + // Otherwise, generate code to compare the lookahead character // with all of the character ranges. - else if transition.ranges.len() > 0 { + if transition.ranges.len() > 0 { add!(self, "if ("); self.add_character_range_conditions(&transition.ranges, transition.is_included, 2); add!(self, ") "); From 67cd6b3d972eddc857a255afcb564cd967f23cf4 Mon Sep 17 00:00:00 2001 From: Mohamed Al-Fahim <31521089+MohamedAlFahim@users.noreply.github.com> Date: Sat, 30 May 2020 10:57:44 -0700 Subject: [PATCH 045/282] Grammar fix (#631) --- docs/section-3-creating-parsers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index dc7285f544..55ebc13d7c 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -18,7 +18,7 @@ In order to develop a Tree-sitter parser, there are two dependencies that you ne ### Installation -To create a Tree-sitter parser, you need to use the [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways: +To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways: * Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This is the recommended approach, and it is discussed further in the next section. * Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`. @@ -204,7 +204,7 @@ The following is a complete list of built-in functions you can use in your `gram * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation. * **Repetitions : `repeat(rule)`** - This function creates a rule that matches *zero-or-more* occurrences of a given rule. It is analogous to the `{x}` (curly brace) syntax in EBNF notation. * **Repetitions : `repeat1(rule)`** - This function creates a rule that matches *one-or-more* occurrences of a given rule. The previous `repeat` rule is implemented in terms of `repeat1` but is included because it is very commonly used. -* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule it is analogous to the `[x]` (square bracket) syntax in EBNF notation. +* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule. It is analogous to the `[x]` (square bracket) syntax in EBNF notation. * **Precedence : `prec(number, rule)`** - This function marks the given rule with a numerical precedence which will be used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the [precedence directives][yacc-prec] in Yacc grammars. * **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars. * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. From 35cc10768b60eec9635072c326d550451e5d0d90 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 1 Jun 2020 09:32:53 -0700 Subject: [PATCH 046/282] Add BLM banner --- docs/index.html | 128 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 docs/index.html diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000000..91082812f3 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,128 @@ + + + + + + Black Lives Matter + + + + + + + +
+
+

Black Lives Matter

+

We stand in solidarity with George Floyd, Natosha McDade, Yassin Mohamed, Finan H. Berhe, Sean Reed, Steven Demarco Taylor, Breonna Taylor, Ariane McCree, Terrance Franklin, Miles Hall, Darius Tarver, William Green, Samuel David Mallard, Kwame Jones, De’von Bailey, Christopher Whitfield, Anthony Hill, De’Von Bailey, Eric Logan, Jamarion Robinson, Gregory Hill Jr, JaQuavion Slaton, Ryan Twyman, Brandon Webber, Jimmy Atchison, Willie McCoy, Emantic Fitzgerald Bradford J, D’ettrick Griffin, Jemel Roberson, DeAndre Ballard, Botham Shem Jean, Robert Lawrence White, Anthony Lamar Smith, Ramarley Graham, Manuel Loggins Jr, Trayvon Martin, Wendell Allen, Kendrec McDade, Larry Jackson Jr, Jonathan Ferrell, Jordan Baker, Victor White III, Dontre Hamilton, Eric Garner, John Crawford III, Michael Brown, Ezell Ford, Dante Parker, Kajieme Powell, Laquan McDonald, Akai Gurley, Tamir Rice, Rumain Brisbon, Jerame Reid, Charly Keunang, Tony Robinson, Walter Scott, Freddie Gray, Brendon Glenn, Samuel DuBose, Christian Taylor, Jamar Clark, Mario Woods, Quintonio LeGrier, Gregory Gunn, Akiel Denkins, Alton Sterling, Philando Castile, Terrence Sterling, Terence Crutcher, Keith Lamont Scott, Alfred Olango, Jordan Edwards, Stephon Clark, Danny Ray Thomas, DeJuan Guillory, Patrick Harmon, Jonathan Hart, Maurice Granton, Julius Johnson, Jamee Johnson, Michael Dean...

+
+ + +
+ + From 47d607da8dac8693d5c93e7705f161fd46d9349f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 1 Jun 2020 13:23:07 -0700 Subject: [PATCH 047/282] Add alternative syntax in queries (#630) * Add alternative syntax in queries * Add tests and tweak error handling for alternatives in queries --- cli/src/tests/query_test.rs | 141 +++++++++++++++++++++++++++++++++- lib/src/query.c | 149 ++++++++++++++++++++++++++++-------- 2 files changed, 252 insertions(+), 38 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5dbfea18c0..5499048e2c 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -87,6 +87,17 @@ fn test_query_errors_on_invalid_syntax() { .join("\n") )) ); + assert_eq!( + Query::new(language, r#"((identifier) [])"#), + Err(QueryError::Syntax( + 1, + [ + "((identifier) [])", // + " ^", + ] + .join("\n") + )) + ); assert_eq!( Query::new(language, r#"((identifier) (#a)"#), Err(QueryError::Syntax( @@ -367,8 +378,9 @@ fn test_query_matches_with_many_overlapping_results() { function: (identifier) @function) ((identifier) @constant (#match? @constant "[A-Z\\d_]+")) - "# - ).unwrap(); + "#, + ) + .unwrap(); let count = 80; @@ -388,8 +400,13 @@ fn test_query_matches_with_many_overlapping_results() { &[ (0, vec![("method", "foo")]), (1, vec![("function", "bar")]), - (2, vec![("constant", "BAZ")]) - ].iter().cloned().cycle().take(3 * count).collect::>(), + (2, vec![("constant", "BAZ")]), + ] + .iter() + .cloned() + .cycle() + .take(3 * count) + .collect::>(), ); }); } @@ -875,6 +892,122 @@ fn test_query_matches_with_repeated_internal_nodes() { }) } +#[test] +fn test_query_matches_with_simple_alternatives() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (pair + key: [(property_identifier) (string)] @key + value: [(function) @val1 (arrow_function) @val2]) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = { + b: c, + 'd': e => f, + g: { + h: function i() {}, + 'x': null, + j: _ => k + }, + 'l': function m() {}, + }; + ", + &[ + (0, vec![("key", "'d'"), ("val2", "e => f")]), + (0, vec![("key", "h"), ("val1", "function i() {}")]), + (0, vec![("key", "j"), ("val2", "_ => k")]), + (0, vec![("key", "'l'"), ("val1", "function m() {}")]), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_in_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (array + [(identifier) (string)] @el + . + ( + "," + . + [(identifier) (string)] @el + )*) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = [b, 'c', d, 1, e, 'f', 'g', h]; + ", + &[ + (0, vec![("el", "b"), ("el", "'c'"), ("el", "d")]), + ( + 0, + vec![("el", "e"), ("el", "'f'"), ("el", "'g'"), ("el", "h")], + ), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_at_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + [ + "if" + "else" + "function" + "throw" + "return" + ] @keyword + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + function a(b, c, d) { + if (b) { + return c; + } else { + throw d; + } + } + ", + &[ + (0, vec![("keyword", "function")]), + (0, vec![("keyword", "if")]), + (0, vec![("keyword", "return")]), + (0, vec![("keyword", "else")]), + (0, vec![("keyword", "throw")]), + ], + ); + }) +} + #[test] fn test_query_matches_in_language_with_simple_aliases() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 89b5e4b575..1999606689 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -8,6 +8,9 @@ #include "./unicode.h" #include +// #define LOG(...) fprintf(stderr, __VA_ARGS__) +#define LOG(...) + #define MAX_STATE_COUNT 256 #define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 @@ -32,9 +35,8 @@ typedef struct { * wildcard symbol, '_'. * - `field` - The field name to match. A zero value means that a field name * was not specified. - * - `capture_id` - An integer representing the name of the capture associated - * with this node in the pattern. A `NONE` value means this node is not - * captured in this pattern. + * - `capture_ids` - An array of integers representing the names of captures + * associated with this node in the pattern, terminated by a `NONE` value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. * - `alternative_index` - The index of a different query step that serves as @@ -49,8 +51,9 @@ typedef struct { bool contains_captures: 1; bool is_pattern_start: 1; bool is_immediate: 1; - bool is_last: 1; - bool is_placeholder: 1; + bool is_last_child: 1; + bool is_pass_through: 1; + bool is_dead_end: 1; bool alternative_is_immediate: 1; } QueryStep; @@ -177,9 +180,6 @@ static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1; -// #define LOG(...) fprintf(stderr, __VA_ARGS__) -#define LOG(...) - /********** * Stream **********/ @@ -447,9 +447,10 @@ static QueryStep query_step__new( .capture_ids = {NONE, NONE, NONE}, .alternative_index = NONE, .contains_captures = false, - .is_last = false, + .is_last_child = false, .is_pattern_start = false, - .is_placeholder = false, + .is_pass_through = false, + .is_dead_end = false, .is_immediate = is_immediate, .alternative_is_immediate = false, }; @@ -714,15 +715,60 @@ static TSQueryError ts_query__parse_pattern( uint32_t *capture_count, bool is_immediate ) { - uint16_t starting_step_index = self->steps.size; + uint32_t starting_step_index = self->steps.size; if (stream->next == 0) return TSQueryErrorSyntax; - // Finish the parent S-expression - if (stream->next == ')') { + // Finish the parent S-expression. + if (stream->next == ')' || stream->next == ']') { return PARENT_DONE; } + // An open bracket is the start of an alternation. + else if (stream->next == '[') { + stream_advance(stream); + stream_skip_whitespace(stream); + + // Parse each branch, and add a placeholder step in between the branches. + Array(uint32_t) branch_step_indices = array_new(); + for (;;) { + uint32_t start_index = self->steps.size; + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + capture_count, + is_immediate + ); + + if (e == PARENT_DONE && stream->next == ']' && branch_step_indices.size > 0) { + stream_advance(stream); + break; + } else if (e) { + array_delete(&branch_step_indices); + return e; + } + + array_push(&branch_step_indices, start_index); + array_push(&self->steps, query_step__new(0, depth, false)); + } + array_pop(&self->steps); + + // For all of the branches except for the last one, add the subsequent branch as an + // alternative, and link the end of the branch to the current end of the steps. + for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { + uint32_t step_index = branch_step_indices.contents[i]; + uint32_t next_step_index = branch_step_indices.contents[i + 1]; + QueryStep *start_step = &self->steps.contents[step_index]; + QueryStep *end_step = &self->steps.contents[next_step_index - 1]; + start_step->alternative_index = next_step_index; + end_step->alternative_index = self->steps.size; + end_step->is_dead_end = true; + } + + array_delete(&branch_step_indices); + } + // An open parenthesis can be the start of three possible constructs: // * A grouped sequence // * A predicate @@ -732,7 +778,7 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // If this parenthesis is followed by a node, then it represents a grouped sequence. - if (stream->next == '(' || stream->next == '"') { + if (stream->next == '(' || stream->next == '"' || stream->next == '[') { bool child_is_immediate = false; for (;;) { if (stream->next == '.') { @@ -747,7 +793,7 @@ static TSQueryError ts_query__parse_pattern( capture_count, child_is_immediate ); - if (e == PARENT_DONE) { + if (e == PARENT_DONE && stream->next == ')') { stream_advance(stream); break; } else if (e) { @@ -828,9 +874,9 @@ static TSQueryError ts_query__parse_pattern( capture_count, child_is_immediate ); - if (e == PARENT_DONE) { + if (e == PARENT_DONE && stream->next == ')') { if (child_is_immediate) { - self->steps.contents[child_start_step_index].is_last = true; + self->steps.contents[child_start_step_index].is_last_child = true; } stream_advance(stream); break; @@ -939,42 +985,54 @@ static TSQueryError ts_query__parse_pattern( for (;;) { QueryStep *step = &self->steps.contents[starting_step_index]; + // Parse the one-or-more operator. if (stream->next == '+') { stream_advance(stream); stream_skip_whitespace(stream); + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); repeat_step.alternative_index = starting_step_index; - repeat_step.is_placeholder = true; + repeat_step.is_pass_through = true; repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); } - else if (stream->next == '?') { - stream_advance(stream); - stream_skip_whitespace(stream); - step->alternative_index = self->steps.size; - } - + // Parse the zero-or-more repetition operator. else if (stream->next == '*') { stream_advance(stream); stream_skip_whitespace(stream); + QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); repeat_step.alternative_index = starting_step_index; - repeat_step.is_placeholder = true; + repeat_step.is_pass_through = true; repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); + + while (step->alternative_index != NONE) { + step = &self->steps.contents[step->alternative_index]; + } + step->alternative_index = self->steps.size; + } + + // Parse the optional operator. + else if (stream->next == '?') { + stream_advance(stream); + stream_skip_whitespace(stream); + + while (step->alternative_index != NONE) { + step = &self->steps.contents[step->alternative_index]; + } step->alternative_index = self->steps.size; } // Parse an '@'-prefixed capture pattern else if (stream->next == '@') { stream_advance(stream); - - // Parse the capture name if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *capture_name = stream->input; stream_scan_identifier(stream); uint32_t length = stream->input - capture_name; + stream_skip_whitespace(stream); // Add the capture id to the first step of the pattern uint16_t capture_id = symbol_table_insert_name( @@ -982,10 +1040,22 @@ static TSQueryError ts_query__parse_pattern( capture_name, length ); - query_step__add_capture(step, capture_id); - (*capture_count)++; - stream_skip_whitespace(stream); + for (;;) { + query_step__add_capture(step, capture_id); + if ( + step->alternative_index != NONE && + step->alternative_index > starting_step_index && + step->alternative_index < self->steps.size + ) { + starting_step_index = step->alternative_index; + step = &self->steps.contents[starting_step_index]; + } else { + break; + } + } + + (*capture_count)++; } // No more suffix modifiers @@ -1062,6 +1132,7 @@ TSQuery *ts_query_new( // If any pattern could not be parsed, then report the error information // and terminate. if (*error_type) { + if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; *error_offset = stream.input - source; ts_query_delete(self); return NULL; @@ -1086,6 +1157,9 @@ TSQuery *ts_query_new( if (step->symbol == WILDCARD_SYMBOL) { self->wildcard_root_pattern_count++; } + + // If there are alternatives or options at the root of the pattern, + // then add multiple entries to the pattern map. if (step->alternative_index != NONE) { start_step_index = step->alternative_index; } else { @@ -1583,7 +1657,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if ((step->is_immediate && is_named) || state->seeking_immediate_match) { later_sibling_can_match = false; } - if (step->is_last && can_have_later_siblings) { + if (step->is_last_child && can_have_later_siblings) { node_does_match = false; } if (step->field) { @@ -1705,8 +1779,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { QueryState *state = &self->states.contents[j]; QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->alternative_index != NONE) { + if (next_step->is_dead_end) { + state->step_index = next_step->alternative_index; + j--; + continue; + } + QueryState *copy = ts_query__cursor_copy_state(self, state); - if (next_step->is_placeholder) { + if (next_step->is_pass_through) { state->step_index++; j--; } @@ -1718,10 +1798,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { copy->seeking_immediate_match = true; } LOG( - " split state for branch. pattern:%u, step:%u, step:%u\n", + " split state for branch. pattern:%u, step:%u, step:%u, immediate:%d\n", copy->pattern_index, state->step_index, - copy->step_index + copy->step_index, + copy->seeking_immediate_match ); } } From 28a779d6a0b4b84f8bf81390198d7d0f619557b9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 1 Jun 2020 13:28:52 -0700 Subject: [PATCH 048/282] wasm: Allow arbitrary predicates in queries --- lib/binding_web/binding.js | 27 +++++++++++----- lib/binding_web/test/query-test.js | 49 ++++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index eb6d4d8aad..567b7eb317 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -44,6 +44,8 @@ class Parser { delete() { C._ts_parser_delete(this[0]); C._free(this[1]); + this[0] = 0; + this[1] = 0; } setLanguage(language) { @@ -163,6 +165,7 @@ class Tree { delete() { C._ts_tree_delete(this[0]); + this[0] = 0; } edit(edit) { @@ -506,6 +509,7 @@ class TreeCursor { delete() { marshalTreeCursor(this); C._ts_tree_cursor_delete_wasm(this.tree[0]); + this[0] = this[1] = this[2] = 0; } reset(node) { @@ -716,6 +720,7 @@ class Language { const assertedProperties = new Array(patternCount); const refutedProperties = new Array(patternCount); const predicates = new Array(patternCount); + const textPredicates = new Array(patternCount); for (let i = 0; i < patternCount; i++) { const predicatesAddress = C._ts_query_predicates_for_pattern( address, @@ -725,6 +730,7 @@ class Language { const stepCount = getValue(TRANSFER_BUFFER, 'i32'); predicates[i] = []; + textPredicates[i] = []; const steps = []; let stepAddress = predicatesAddress; @@ -756,7 +762,7 @@ class Language { if (steps[2].type === 'capture') { const captureName1 = steps[1].name; const captureName2 = steps[2].name; - predicates[i].push(function(captures) { + textPredicates[i].push(function(captures) { let node1, node2 for (const c of captures) { if (c.name === captureName1) node1 = c.node; @@ -767,7 +773,7 @@ class Language { } else { const captureName = steps[1].name; const stringValue = steps[2].value; - predicates[i].push(function(captures) { + textPredicates[i].push(function(captures) { for (const c of captures) { if (c.name === captureName) { return (c.node.text === stringValue) === isPositive; @@ -790,7 +796,7 @@ class Language { ); const captureName = steps[1].name; const regex = new RegExp(steps[2].value); - predicates[i].push(function(captures) { + textPredicates[i].push(function(captures) { for (const c of captures) { if (c.name === captureName) return regex.test(c.node.text); } @@ -823,7 +829,7 @@ class Language { break; default: - throw new Error(`Unknown query predicate \`#${steps[0].value}\``); + predicates[i].push({operator, operands: steps.slice(1)}); } steps.length = 0; @@ -840,6 +846,7 @@ class Language { INTERNAL, address, captureNames, + textPredicates, predicates, Object.freeze(setProperties), Object.freeze(assertedProperties), @@ -888,12 +895,13 @@ class Language { class Query { constructor( - internal, address, captureNames, predicates, + internal, address, captureNames, textPredicates, predicates, setProperties, assertedProperties, refutedProperties ) { assertInternal(internal); this[0] = address; this.captureNames = captureNames; + this.textPredicates = textPredicates; this.predicates = predicates; this.setProperties = setProperties; this.assertedProperties = assertedProperties; @@ -902,6 +910,7 @@ class Query { delete() { C._ts_query_delete(this[0]); + this[0] = 0; } matches(node, startPosition, endPosition) { @@ -932,7 +941,7 @@ class Query { const captures = new Array(captureCount); address = unmarshalCaptures(this, node.tree, address, captures); - if (this.predicates[pattern].every(p => p(captures))) { + if (this.textPredicates[pattern].every(p => p(captures))) { result[i] = {pattern, captures}; const setProperties = this.setProperties[pattern]; if (setProperties) result[i].setProperties = setProperties; @@ -979,7 +988,7 @@ class Query { captures.length = captureCount address = unmarshalCaptures(this, node.tree, address, captures); - if (this.predicates[pattern].every(p => p(captures))) { + if (this.textPredicates[pattern].every(p => p(captures))) { const capture = captures[captureIndex]; const setProperties = this.setProperties[pattern]; if (setProperties) capture.setProperties = setProperties; @@ -994,6 +1003,10 @@ class Query { C._free(startAddress); return result; } + + predicatesForPattern(patternIndex) { + return this.predicates[patternIndex] + } } function getText(tree, startIndex, endIndex) { diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index 8683214a04..9dda983489 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -45,9 +45,6 @@ describe("Query", () => { assert.throws(() => { JavaScript.query("((identifier) @a (eq? @a @a @a))"); }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 3"); - assert.throws(() => { - JavaScript.query("((identifier) @a (#something-else? @a))"); - }, "Unknown query predicate `#something-else?`"); }); }); @@ -207,6 +204,52 @@ describe("Query", () => { ]); }); }); + + describe(".predicatesForPattern(index)", () => { + it("returns all of the predicates as objects", () => { + query = JavaScript.query(` + ( + (binary_expression + left: (identifier) @a + right: (identifier) @b) + (#something? @a @b) + (#match? @a "c") + (#something-else? @a "A" @b "B") + ) + + ((identifier) @c + (#hello! @c)) + + "if" @d + `); + + assert.deepEqual(query.predicatesForPattern(0), [ + { + operator: "something?", + operands: [ + { type: "capture", name: "a" }, + { type: "capture", name: "b" }, + ], + }, + { + operator: "something-else?", + operands: [ + { type: "capture", name: "a" }, + { type: "string", value: "A" }, + { type: "capture", name: "b" }, + { type: "string", value: "B" }, + ], + }, + ]); + assert.deepEqual(query.predicatesForPattern(1), [ + { + operator: "hello!", + operands: [{ type: "capture", name: "c" }], + }, + ]); + assert.deepEqual(query.predicatesForPattern(2), []); + }); + }); }); function formatMatches(matches) { From c31afbb86abd416872dec0edf9782e1e4424b221 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 1 Jun 2020 13:45:54 -0700 Subject: [PATCH 049/282] lib: 0.16.1 --- Cargo.lock | 8 ++++---- lib/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 378ee2986a..355debf5da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,7 +732,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.16.0" +version = "0.16.1" dependencies = [ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -762,7 +762,7 @@ dependencies = [ "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.16.0", + "tree-sitter 0.16.1", "tree-sitter-highlight 0.2.0", "tree-sitter-tags 0.2.0", "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -773,7 +773,7 @@ name = "tree-sitter-highlight" version = "0.2.0" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.16.0", + "tree-sitter 0.16.1", ] [[package]] @@ -782,7 +782,7 @@ version = "0.2.0" dependencies = [ "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.16.0", + "tree-sitter 0.16.1", ] [[package]] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 353ec8cec4..960ca2da43 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.16.0" +version = "0.16.1" authors = ["Max Brunsfeld "] license = "MIT" readme = "binding_rust/README.md" From b57bd59ed4037a25970719d3480203d4f6b82827 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 1 Jun 2020 13:47:28 -0700 Subject: [PATCH 050/282] web: 0.16.4 --- lib/binding_web/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index 043394e4a6..9c93dac689 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -1,6 +1,6 @@ { "name": "web-tree-sitter", - "version": "0.16.3", + "version": "0.16.4", "description": "Tree-sitter bindings for the web", "main": "tree-sitter.js", "types": "tree-sitter-web.d.ts", From 9a82dcc666d06617cbab3061467075019fae0b0d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 1 Jun 2020 13:48:37 -0700 Subject: [PATCH 051/282] 0.16.8 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 355debf5da..cdad3b6167 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.16.7" +version = "0.16.8" dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 76d8ede5af..0d85952f5f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.16.7" +version = "0.16.8" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index c03a57bd6b..738c5622d6 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.16.7", + "version": "0.16.8", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 81d533d2d1b580fdb507accabc91ceddffb5b6f0 Mon Sep 17 00:00:00 2001 From: Thomas Vigouroux <39092278+vigoux@users.noreply.github.com> Date: Wed, 3 Jun 2020 21:19:57 +0200 Subject: [PATCH 052/282] Fix compilation warnings (#635) * lib: fix compilation warnings * ci: add CFLAGS --- .travis.yml | 3 +++ Makefile | 2 +- lib/src/alloc.h | 1 + lib/src/parser.c | 4 +++- lib/src/query.c | 8 ++++---- lib/src/stack.c | 2 ++ 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98ca9ccf40..f19cdbfe28 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: rust rust: - stable +env: + CFLAGS="-Wall -Wextra -Werror -Wstrict-prototypes" + matrix: include: - os: osx diff --git a/Makefile b/Makefile index 95f53b9abe..764f411a8c 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ endif OBJ := $(SRC:.c=.o) # define default flags, and override to append mandatory flags -CFLAGS ?= -O3 +CFLAGS ?= -O3 -Wall -Wextra -Werror override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include # ABI versioning diff --git a/lib/src/alloc.h b/lib/src/alloc.h index c8fe6c6e6d..9bbf751335 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -38,6 +38,7 @@ static inline void ts_free(void *buffer) { #include static inline bool ts_toggle_allocation_recording(bool value) { + (void)value; return false; } diff --git a/lib/src/parser.c b/lib/src/parser.c index 19add152f1..dd222cd3c4 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -101,9 +101,10 @@ typedef struct { static const char *ts_string_input_read( void *_self, uint32_t byte, - TSPoint _, + TSPoint pt, uint32_t *length ) { + (void)pt; TSStringInput *self = (TSStringInput *)_self; if (byte >= self->length) { *length = 0; @@ -210,6 +211,7 @@ static ErrorComparison ts_parser__compare_versions( ErrorStatus a, ErrorStatus b ) { + (void)self; if (!a.is_in_error && b.is_in_error) { if (a.cost < b.cost) { return ErrorComparisonTakeLeft; diff --git a/lib/src/query.c b/lib/src/query.c index 1999606689..59902dee3b 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -259,7 +259,7 @@ static void stream_scan_identifier(Stream *stream) { * CaptureListPool ******************/ -static CaptureListPool capture_list_pool_new() { +static CaptureListPool capture_list_pool_new(void) { return (CaptureListPool) { .empty_list = array_new(), .usage_map = UINT32_MAX, @@ -315,7 +315,7 @@ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { * SymbolTable **************/ -static SymbolTable symbol_table_new() { +static SymbolTable symbol_table_new(void) { return (SymbolTable) { .characters = array_new(), .slices = array_new(), @@ -752,7 +752,7 @@ static TSQueryError ts_query__parse_pattern( array_push(&branch_step_indices, start_index); array_push(&self->steps, query_step__new(0, depth, false)); } - array_pop(&self->steps); + (void)array_pop(&self->steps); // For all of the branches except for the last one, add the subsequent branch as an // alternative, and link the end of the branch to the current end of the steps. @@ -1267,7 +1267,7 @@ void ts_query_disable_pattern( * QueryCursor ***************/ -TSQueryCursor *ts_query_cursor_new() { +TSQueryCursor *ts_query_cursor_new(void) { TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); *self = (TSQueryCursor) { .ascending = false, diff --git a/lib/src/stack.c b/lib/src/stack.c index ade1577566..6ceee2577f 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -480,6 +480,7 @@ StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t c } inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { + (void)payload; if (iterator->subtree_count >= 1) { if (iterator->is_pending) { return StackActionPop | StackActionStop; @@ -532,6 +533,7 @@ SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { } inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { + (void)payload; return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; } From 0e5ff1497665df3551525cbe3f8b7a43f19f9683 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 4 Jun 2020 13:40:04 -0700 Subject: [PATCH 053/282] Requery the parse table when breaking down the parse stack on invalid lookahead (#636) * Requery parse table after breaking down parse stack due to invalid lookahead * Include Ruby parser in randomized test suite Ruby and PHP are our only two languages that use non-terminal extras. Adding Ruby uncovered some bugs. * Print edited source code when running parse --edit w/ debug flag * Recompute lookahead when breaking down stack on invalid lookahead * Fix stack summary leak when there are two discontinuities on a stack version --- cli/src/parse.rs | 10 +++++++++- cli/src/tests/corpus_test.rs | 1 + lib/src/parser.c | 5 +++++ lib/src/stack.c | 11 ++++++++++- 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index d1ddb49924..13bac0f371 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -70,10 +70,18 @@ pub fn parse_file_at_path( let mut stdout = stdout.lock(); if let Some(mut tree) = tree { - for edit in edits { + if debug_graph && !edits.is_empty() { + println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); + } + + for (i, edit) in edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; perform_edit(&mut tree, &mut source_code, &edit); tree = parser.parse(&source_code, Some(&tree)).unwrap(); + + if debug_graph { + println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code)); + } } let duration = time.elapsed(); diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index e201d74310..732ff9ad19 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -24,6 +24,7 @@ const LANGUAGES: &'static [&'static str] = &[ "json", "php", "python", + "ruby", "rust", ]; diff --git a/lib/src/parser.c b/lib/src/parser.c index dd222cd3c4..e9c16f0cc6 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1339,6 +1339,7 @@ static bool ts_parser__advance( ); } +lex: // Otherwise, re-run the lexer. if (!lookahead.ptr) { lookahead = ts_parser__lex(self, version, state); @@ -1500,6 +1501,10 @@ static bool ts_parser__advance( // push each of its children. Then try again to process the current // lookahead. if (ts_parser__breakdown_top_of_stack(self, version)) { + state = ts_stack_state(self->stack, version); + ts_subtree_release(&self->tree_pool, lookahead); + lookahead = NULL_SUBTREE; + goto lex; continue; } diff --git a/lib/src/stack.c b/lib/src/stack.c index 6ceee2577f..6a8d897c37 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -571,7 +571,12 @@ void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_dep }; array_init(session.summary); stack__iter(self, version, summarize_stack_callback, &session, -1); - self->heads.contents[version].summary = session.summary; + StackHead *head = &self->heads.contents[version]; + if (head->summary) { + array_delete(head->summary); + ts_free(head->summary); + } + head->summary = session.summary; } StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { @@ -743,6 +748,10 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) ts_stack_error_cost(self, i) ); + if (head->summary) { + fprintf(f, "\nsummary_size: %u", head->summary->size); + } + if (head->last_external_token.ptr) { const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; const char *data = ts_external_scanner_state_data(state); From 9b9329cb6c169a03458d6bc1bb594fff76b206ca Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Sat, 6 Jun 2020 17:17:38 -0500 Subject: [PATCH 054/282] Fix some typos (#639) --- docs/section-3-creating-parsers.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 55ebc13d7c..c877ba6f7d 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -66,7 +66,7 @@ module.exports = grammar({ }); ``` -Then run the the following command: +Then run the following command: ```sh tree-sitter generate @@ -152,7 +152,7 @@ func x() int { These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly. -By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the the `-f` flag: +By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag: ```sh tree-sitter test -f 'Return statements' @@ -208,7 +208,7 @@ The following is a complete list of built-in functions you can use in your `gram * **Precedence : `prec(number, rule)`** - This function marks the given rule with a numerical precedence which will be used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the [precedence directives][yacc-prec] in Yacc grammars. * **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars. * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. -* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. +* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. * **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token. * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string. * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children. @@ -344,7 +344,7 @@ Imagine that you were just starting work on the [Tree-sitter JavaScript parser][ return x + y; ``` -According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: +According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: ``` ReturnStatement -> 'return' Expression From 519a1369ce6292d28b7626590c78a5c0b055d2b0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 15 Jun 2020 10:51:34 -0700 Subject: [PATCH 055/282] In highlight test, reset included ranges before finding assertions --- cli/src/test_highlight.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 2011af4091..cf163c0589 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -168,6 +168,7 @@ pub fn parse_highlight_test( let mut assertion_ranges = Vec::new(); // Parse the code. + parser.set_included_ranges(&[]).unwrap(); parser.set_language(language).unwrap(); let tree = parser.parse(source, None).unwrap(); From a6f71328fe153e01ad2e96151118b5cbb31a30ee Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 16 Jun 2020 09:22:34 -0700 Subject: [PATCH 056/282] Avoid whitelist/blacklist terminology in test comments --- cli/src/generate/nfa.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs index bf9ca58d93..4cbfaaa325 100644 --- a/cli/src/generate/nfa.rs +++ b/cli/src/generate/nfa.rs @@ -761,7 +761,7 @@ mod tests { .add_range('d', 'e') ); - // A whitelist and an intersecting blacklist. + // An inclusion and an intersecting exclusion. // Both sets contain 'e', 'f', and 'm' let mut a = CharacterSet::empty() .add_range('c', 'h') @@ -791,7 +791,7 @@ mod tests { assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l'])); assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate()); - // A blacklist and an overlapping blacklist. + // An exclusion and an overlapping inclusion. // Both sets exclude 'c', 'd', and 'e' let mut a = CharacterSet::empty().add_range('a', 'e').negate(); let mut b = CharacterSet::empty().add_range('c', 'h').negate(); @@ -802,7 +802,7 @@ mod tests { assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h'])); assert_eq!(b, CharacterSet::Include(vec!['a', 'b'])); - // A blacklist and a larger blacklist. + // An exclusion and a larger exclusion. let mut a = CharacterSet::empty().add_range('b', 'c').negate(); let mut b = CharacterSet::empty().add_range('a', 'd').negate(); assert_eq!( From fa199e3a1a1f300e6acabe3546e92ba180167f65 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 16:04:02 -0700 Subject: [PATCH 057/282] Allow most tags to be arbitrarily named, remove hardcoded kinds --- tags/src/lib.rs | 104 ++++++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 69 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8d1853bb95..296ac9ba64 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -4,7 +4,8 @@ use memchr::{memchr, memrchr}; use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{fmt, mem, str}; +use std::{mem, str}; +use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, }; @@ -18,12 +19,8 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; pub struct TagsConfiguration { pub language: Language, pub query: Query, - call_capture_index: Option, - class_capture_index: Option, + capture_map: HashMap, doc_capture_index: Option, - function_capture_index: Option, - method_capture_index: Option, - module_capture_index: Option, name_capture_index: Option, local_scope_capture_index: Option, local_definition_capture_index: Option, @@ -38,21 +35,13 @@ pub struct TagsContext { #[derive(Debug, Clone)] pub struct Tag { - pub kind: TagKind, pub range: Range, pub name_range: Range, pub line_range: Range, pub span: Range, pub docs: Option, -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum TagKind { - Function, - Method, - Class, - Module, - Call, + pub is_definition: bool, + pub kind: String, } #[derive(Debug, PartialEq)] @@ -111,29 +100,23 @@ impl TagsConfiguration { } } - let mut call_capture_index = None; - let mut class_capture_index = None; + let mut capture_map: HashMap = HashMap::new(); let mut doc_capture_index = None; - let mut function_capture_index = None; - let mut method_capture_index = None; - let mut module_capture_index = None; let mut name_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { - let index = match name.as_str() { - "call" => &mut call_capture_index, - "class" => &mut class_capture_index, - "doc" => &mut doc_capture_index, - "function" => &mut function_capture_index, - "method" => &mut method_capture_index, - "module" => &mut module_capture_index, - "name" => &mut name_capture_index, - "local.scope" => &mut local_scope_capture_index, - "local.definition" => &mut local_definition_capture_index, - _ => continue, - }; - *index = Some(i as u32); + match name.as_str() { + "" => continue, + "name" => name_capture_index = Some(i as u32), + "doc" => doc_capture_index = Some(i as u32), + "local.scope" => local_scope_capture_index = Some(i as u32), + "local.definition" => local_definition_capture_index = Some(i as u32), + _ => { + capture_map.insert(i as u32, name.to_string()); + continue; + } + } } let pattern_info = (0..query.pattern_count()) @@ -180,12 +163,8 @@ impl TagsConfiguration { Ok(TagsConfiguration { language, query, - function_capture_index, - class_capture_index, - method_capture_index, - module_capture_index, + capture_map, doc_capture_index, - call_capture_index, name_capture_index, tags_pattern_index, local_scope_capture_index, @@ -303,7 +282,8 @@ where let mut name_range = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; - let mut kind = TagKind::Call; + let mut kind = "unknown"; + let mut is_definition = false; let mut docs_adjacent_node = None; for capture in mat.captures { @@ -317,21 +297,18 @@ where name_range = Some(capture.node.byte_range()); } else if index == self.config.doc_capture_index { doc_nodes.push(capture.node); - } else if index == self.config.call_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Call; - } else if index == self.config.class_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Class; - } else if index == self.config.function_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Function; - } else if index == self.config.method_capture_index { - tag_node = Some(capture.node); - kind = TagKind::Method; - } else if index == self.config.module_capture_index { + } + + if let Some(name) = self.config.capture_map.get(&capture.index) { tag_node = Some(capture.node); - kind = TagKind::Module; + kind = if name.starts_with("definition.") { + is_definition = true; + name.trim_start_matches("definition.") + } else if name.starts_with("reference.") { + name.trim_start_matches("reference.") + } else { + name + } } } @@ -414,10 +391,11 @@ where *tag = Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), span: tag_node.start_position()..tag_node.end_position(), - kind, range, name_range, docs, + kind: kind.to_string(), + is_definition, }; } } @@ -427,10 +405,11 @@ where Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), span: tag_node.start_position()..tag_node.end_position(), - kind, range, name_range, docs, + kind: kind.to_string(), + is_definition, }, mat.pattern_index, ), @@ -448,19 +427,6 @@ where } } -impl fmt::Display for TagKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - TagKind::Call => "Call", - TagKind::Module => "Module", - TagKind::Class => "Class", - TagKind::Method => "Method", - TagKind::Function => "Function", - } - .fmt(f) - } -} - impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 8d7459ed578b8f66bde36624c3f91e40d54d79a2 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 16:04:13 -0700 Subject: [PATCH 058/282] Bring c_lib inline --- tags/src/c_lib.rs | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 0c36797798..c8ca8ed539 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{Error, TagKind, TagsConfiguration, TagsContext}; +use super::{Error, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; use std::process::abort; @@ -19,16 +19,6 @@ pub enum TSTagsError { Unknown, } -#[repr(C)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum TSTagKind { - Function, - Method, - Class, - Module, - Call, -} - #[repr(C)] pub struct TSPoint { row: u32, @@ -37,7 +27,6 @@ pub struct TSPoint { #[repr(C)] pub struct TSTag { - pub kind: TSTagKind, pub start_byte: u32, pub end_byte: u32, pub name_start_byte: u32, @@ -48,6 +37,8 @@ pub struct TSTag { pub end_point: TSPoint, pub docs_start_byte: u32, pub docs_end_byte: u32, + pub kind: String, + pub is_definition: bool, } pub struct TSTagger { @@ -153,13 +144,6 @@ pub extern "C" fn ts_tagger_tag( buffer.docs.extend_from_slice(docs.as_bytes()); } buffer.tags.push(TSTag { - kind: match tag.kind { - TagKind::Function => TSTagKind::Function, - TagKind::Method => TSTagKind::Method, - TagKind::Class => TSTagKind::Class, - TagKind::Module => TSTagKind::Module, - TagKind::Call => TSTagKind::Call, - }, start_byte: tag.range.start as u32, end_byte: tag.range.end as u32, name_start_byte: tag.name_range.start as u32, @@ -176,6 +160,8 @@ pub extern "C" fn ts_tagger_tag( }, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, + kind: tag.kind, + is_definition: tag.is_definition, }); } From 9bf4939b9a1093f6c42d0bdcf268fef8a4e04d8f Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 16:04:22 -0700 Subject: [PATCH 059/282] Show if tag is a def/ref in the cli --- cli/src/tags.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index d6704ec52a..6308d396e9 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -42,9 +42,10 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> let tag = tag?; write!( &mut stdout, - " {:<8} {:<40}\t{:>9}-{:<9}", + " {:<8} {:<40}\t [{}] {:>9}-{:<9}", tag.kind, str::from_utf8(&source[tag.name_range]).unwrap_or(""), + if tag.is_definition { "definition" } else { "reference" }, tag.span.start, tag.span.end, )?; From d802b3779145d833dc16e3e075f8e34dd684504a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 17:09:34 -0700 Subject: [PATCH 060/282] Bring back a SyntaxType enum --- cli/src/tags.rs | 2 +- tags/src/c_lib.rs | 28 ++++++++++++-- tags/src/lib.rs | 98 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 105 insertions(+), 23 deletions(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 6308d396e9..06f4f4fa9f 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -43,7 +43,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> write!( &mut stdout, " {:<8} {:<40}\t [{}] {:>9}-{:<9}", - tag.kind, + tag.syntax_type, str::from_utf8(&source[tag.name_range]).unwrap_or(""), if tag.is_definition { "definition" } else { "reference" }, tag.span.start, diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index c8ca8ed539..72c708d0f0 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{Error, TagsConfiguration, TagsContext}; +use super::{Error, SyntaxType, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; use std::process::abort; @@ -19,6 +19,19 @@ pub enum TSTagsError { Unknown, } +#[repr(C)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TSSyntaxType { + Function, + Method, + Class, + Module, + Call, + Type, + Interface, + Implementation, +} + #[repr(C)] pub struct TSPoint { row: u32, @@ -37,7 +50,7 @@ pub struct TSTag { pub end_point: TSPoint, pub docs_start_byte: u32, pub docs_end_byte: u32, - pub kind: String, + pub syntax_type: TSSyntaxType, pub is_definition: bool, } @@ -160,7 +173,16 @@ pub extern "C" fn ts_tagger_tag( }, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, - kind: tag.kind, + syntax_type: match tag.syntax_type { + SyntaxType::Function => TSSyntaxType::Function, + SyntaxType::Method => TSSyntaxType::Method, + SyntaxType::Class => TSSyntaxType::Class, + SyntaxType::Module => TSSyntaxType::Module, + SyntaxType::Call => TSSyntaxType::Call, + SyntaxType::Type => TSSyntaxType::Type, + SyntaxType::Interface => TSSyntaxType::Interface, + SyntaxType::Implementation => TSSyntaxType::Implementation, + }, is_definition: tag.is_definition, }); } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 296ac9ba64..e6179b8b18 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -4,7 +4,7 @@ use memchr::{memchr, memrchr}; use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{mem, str}; +use std::{fmt, mem, str}; use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -19,7 +19,7 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; pub struct TagsConfiguration { pub language: Language, pub query: Query, - capture_map: HashMap, + capture_map: HashMap, doc_capture_index: Option, name_capture_index: Option, local_scope_capture_index: Option, @@ -28,6 +28,27 @@ pub struct TagsConfiguration { pattern_info: Vec, } + +#[derive(Debug)] +pub struct NamedCapture { + pub syntax_type: SyntaxType, + pub is_definition: bool, +} + +// Should stay in sync with list of valid syntax types in semantic. +// See: https://github.com/github/semantic/blob/621696f5bc523a651f1cf9fc2ac58c557ea02d07/proto/semantic.proto#L165-L174 +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum SyntaxType { + Function, + Method, + Class, + Module, + Call, + Type, + Interface, + Implementation, +} + pub struct TagsContext { parser: Parser, cursor: QueryCursor, @@ -41,7 +62,7 @@ pub struct Tag { pub span: Range, pub docs: Option, pub is_definition: bool, - pub kind: String, + pub syntax_type: SyntaxType, } #[derive(Debug, PartialEq)] @@ -100,7 +121,7 @@ impl TagsConfiguration { } } - let mut capture_map: HashMap = HashMap::new(); + let mut capture_map: HashMap = HashMap::new(); let mut doc_capture_index = None; let mut name_capture_index = None; let mut local_scope_capture_index = None; @@ -112,9 +133,8 @@ impl TagsConfiguration { "doc" => doc_capture_index = Some(i as u32), "local.scope" => local_scope_capture_index = Some(i as u32), "local.definition" => local_definition_capture_index = Some(i as u32), - _ => { - capture_map.insert(i as u32, name.to_string()); - continue; + _ => if let Some(nc) = NamedCapture::new(name) { + capture_map.insert(i as u32, nc); } } } @@ -282,7 +302,7 @@ where let mut name_range = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; - let mut kind = "unknown"; + let mut syntax_type = SyntaxType::Function; let mut is_definition = false; let mut docs_adjacent_node = None; @@ -299,16 +319,18 @@ where doc_nodes.push(capture.node); } - if let Some(name) = self.config.capture_map.get(&capture.index) { + if let Some(named_capture) = self.config.capture_map.get(&capture.index) { tag_node = Some(capture.node); - kind = if name.starts_with("definition.") { - is_definition = true; - name.trim_start_matches("definition.") - } else if name.starts_with("reference.") { - name.trim_start_matches("reference.") - } else { - name - } + syntax_type = named_capture.syntax_type; + is_definition = named_capture.is_definition; + // kind = if name.starts_with("definition.") { + // is_definition = true; + // name.trim_start_matches("definition.") + // } else if name.starts_with("reference.") { + // name.trim_start_matches("reference.") + // } else { + // name + // } } } @@ -394,7 +416,7 @@ where range, name_range, docs, - kind: kind.to_string(), + syntax_type, is_definition, }; } @@ -408,7 +430,7 @@ where range, name_range, docs, - kind: kind.to_string(), + syntax_type, is_definition, }, mat.pattern_index, @@ -427,6 +449,44 @@ where } } +impl NamedCapture { + pub fn new(name: &String) -> Option { + let mut is_definition = false; + + let kind = if name.starts_with("definition.") { + is_definition = true; + name.trim_start_matches("definition.") + } else if name.starts_with("reference.") { + name.trim_start_matches("reference.") + } else { + name + }; + + let syntax_type = match kind.as_ref() { + "function" => {is_definition = true; SyntaxType::Function}, + "method" => {is_definition = true; SyntaxType::Method}, + "class" => SyntaxType::Class, + "module" => SyntaxType::Module, + "call" => SyntaxType::Call, + "type" => SyntaxType::Type, + "interface" => SyntaxType::Interface, + "implementation" => SyntaxType::Implementation, + _ => return None, + }; + + return Some(NamedCapture{ + syntax_type, + is_definition + }) + } +} + +impl fmt::Display for SyntaxType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 80f5c522594de99d487aa12a756f369ae48372a3 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 16 Jun 2020 17:19:35 -0700 Subject: [PATCH 061/282] Tests compile --- cli/src/tests/tags_test.rs | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index fad8ebd866..b628350738 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -3,7 +3,7 @@ use super::helpers::fixtures::{get_language, get_language_queries_path}; use std::ffi::CString; use std::{fs, ptr, slice, str}; use tree_sitter_tags::c_lib as c; -use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; +use tree_sitter_tags::{Error, SyntaxType, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( @@ -99,12 +99,12 @@ fn test_tags_python() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| (substr(source, &t.name_range), t.syntax_type)) .collect::>(), &[ - ("Customer", TagKind::Class), - ("age", TagKind::Function), - ("compute_age", TagKind::Call), + ("Customer", SyntaxType::Class), + ("age", SyntaxType::Function), + ("compute_age", SyntaxType::Call), ] ); @@ -150,12 +150,12 @@ fn test_tags_javascript() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| (substr(source, &t.name_range), t.syntax_type)) .collect::>(), &[ - ("Customer", TagKind::Class), - ("getAge", TagKind::Method), - ("Agent", TagKind::Class) + ("Customer", SyntaxType::Class), + ("getAge", SyntaxType::Method), + ("Agent", SyntaxType::Class) ] ); assert_eq!( @@ -204,18 +204,18 @@ fn test_tags_ruby() { tags.iter() .map(|t| ( substr(source.as_bytes(), &t.name_range), - t.kind, + t.syntax_type, (t.span.start.row, t.span.start.column), )) .collect::>(), &[ - ("foo", TagKind::Method, (2, 0)), - ("bar", TagKind::Call, (7, 4)), - ("a", TagKind::Call, (7, 8)), - ("b", TagKind::Call, (7, 11)), - ("each", TagKind::Call, (9, 14)), - ("baz", TagKind::Call, (13, 8)), - ("b", TagKind::Call, (13, 15),), + ("foo", SyntaxType::Method, (2, 0)), + ("bar", SyntaxType::Call, (7, 4)), + ("a", SyntaxType::Call, (7, 8)), + ("b", SyntaxType::Call, (7, 11)), + ("each", SyntaxType::Call, (9, 14)), + ("baz", SyntaxType::Call, (13, 8)), + ("b", SyntaxType::Call, (13, 15),), ] ); } @@ -319,7 +319,7 @@ fn test_tags_via_c_api() { assert_eq!( tags.iter() .map(|tag| ( - tag.kind, + tag.syntax_type, &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], @@ -327,18 +327,18 @@ fn test_tags_via_c_api() { .collect::>(), &[ ( - c::TSTagKind::Function, + c::TSSyntaxType::Function, "b", "function b() {", "one\ntwo\nthree" ), ( - c::TSTagKind::Class, + c::TSSyntaxType::Class, "C", "class C extends D {", "four\nfive" ), - (c::TSTagKind::Call, "b", "b(a);", "") + (c::TSSyntaxType::Call, "b", "b(a);", "") ] ); From 929bb40adcb3678b3a229a272222bd3edab62ecf Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 10:34:55 -0700 Subject: [PATCH 062/282] Shorten to def/ref --- cli/src/tags.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 06f4f4fa9f..4869b8cc5e 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -45,7 +45,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> " {:<8} {:<40}\t [{}] {:>9}-{:<9}", tag.syntax_type, str::from_utf8(&source[tag.name_range]).unwrap_or(""), - if tag.is_definition { "definition" } else { "reference" }, + if tag.is_definition { "def" } else { "ref" }, tag.span.start, tag.span.end, )?; From c08333e0cdbf0cb47253abe1eb856f3f80e4a9ea Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 10:35:07 -0700 Subject: [PATCH 063/282] Defer to debug formatting take 2 --- tags/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index e6179b8b18..dd74f833d1 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -483,7 +483,7 @@ impl NamedCapture { impl fmt::Display for SyntaxType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) + format!("{:?}", self).fmt(f) } } From 3e8bf9daceb19c64cf3e84530d62594729000d1a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 10:35:16 -0700 Subject: [PATCH 064/282] These are always definitions --- tags/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index dd74f833d1..991d3cb55c 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -465,8 +465,8 @@ impl NamedCapture { let syntax_type = match kind.as_ref() { "function" => {is_definition = true; SyntaxType::Function}, "method" => {is_definition = true; SyntaxType::Method}, - "class" => SyntaxType::Class, - "module" => SyntaxType::Module, + "class" => {is_definition = true; SyntaxType::Class}, + "module" => {is_definition = true; SyntaxType::Module}, "call" => SyntaxType::Call, "type" => SyntaxType::Type, "interface" => SyntaxType::Interface, From 30132c682b22b57d7f42883f2cb8480691182551 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 14:12:14 -0700 Subject: [PATCH 065/282] Bring tags.h inline --- tags/include/tree_sitter/tags.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index 946dc6f150..e1ed68bdf9 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -19,15 +19,17 @@ typedef enum { } TSTagsError; typedef enum { - TSTagKindFunction, - TSTagKindMethod, - TSTagKindClass, - TSTagKindModule, - TSTagKindCall, -} TSTagKind; + TSSyntaxTypeFunction, + TSSyntaxTypeMethod, + TSSyntaxTypeClass, + TSSyntaxTypeModule, + TSSyntaxTypeCall, + TSSyntaxTypeType, + TSSyntaxTypeInterface, + TSSyntaxTypeImplementation, +} TSTagSyntaxType; typedef struct { - TSTagKind kind; uint32_t start_byte; uint32_t end_byte; uint32_t name_start_byte; @@ -38,6 +40,8 @@ typedef struct { TSPoint end_point; uint32_t docs_start_byte; uint32_t docs_end_byte; + TSTagSyntaxType syntax_type; + bool is_definition; } TSTag; typedef struct TSTagger TSTagger; From 15202d0b382a083ffa7d3019eec9348c5c35c7d9 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:11:31 -0700 Subject: [PATCH 066/282] Remove commented code --- tags/src/lib.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 991d3cb55c..8cd7345767 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -323,14 +323,6 @@ where tag_node = Some(capture.node); syntax_type = named_capture.syntax_type; is_definition = named_capture.is_definition; - // kind = if name.starts_with("definition.") { - // is_definition = true; - // name.trim_start_matches("definition.") - // } else if name.starts_with("reference.") { - // name.trim_start_matches("reference.") - // } else { - // name - // } } } From 3c39b016a4c538d645a7e0f5bdfd476e4588afd9 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:11:42 -0700 Subject: [PATCH 067/282] Trim whitespace from tag source lines --- tags/src/lib.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8cd7345767..32eaa0d92d 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -495,7 +495,16 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1); let max_line_len = max_line_len.min(text.len() - start); let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len); - start..end + trim_start(text, start..end) +} + +fn trim_start(text: &[u8], r: Range) -> Range { + for (index, c) in text[r.start..r.end].iter().enumerate() { + if !c.is_ascii_whitespace(){ + return index..r.end + } + } + return r } #[cfg(test)] @@ -514,4 +523,13 @@ mod tests { assert_eq!(line_range(text, 5, 10), 4..8); assert_eq!(line_range(text, 11, 10), 9..14); } + + #[test] + fn test_get_line_trims() { + let text = b" foo\nbar\n"; + assert_eq!(line_range(text, 0, 10), 3..6); + + let text = b"\t func foo\nbar\n"; + assert_eq!(line_range(text, 0, 10), 2..10); + } } From 7b2514a6108593f9da31b4bb6638a145bfa77b51 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:12:16 -0700 Subject: [PATCH 068/282] Whitespace --- tags/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 32eaa0d92d..d0746b3dc2 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -28,7 +28,6 @@ pub struct TagsConfiguration { pattern_info: Vec, } - #[derive(Debug)] pub struct NamedCapture { pub syntax_type: SyntaxType, From 819b800cf973418c7dbd73e628ae26401d618580 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:54:29 -0700 Subject: [PATCH 069/282] Pick up the proper initial index and test --- tags/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index d0746b3dc2..d57e3fb52b 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -500,7 +500,7 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { fn trim_start(text: &[u8], r: Range) -> Range { for (index, c) in text[r.start..r.end].iter().enumerate() { if !c.is_ascii_whitespace(){ - return index..r.end + return (r.start+index)..r.end } } return r @@ -530,5 +530,6 @@ mod tests { let text = b"\t func foo\nbar\n"; assert_eq!(line_range(text, 0, 10), 2..10); + assert_eq!(line_range(text, 11, 10), 11..14); } } From f24a952cb48706cf3134ad8da505462098b65348 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 17 Jun 2020 15:54:36 -0700 Subject: [PATCH 070/282] Minor output changes --- cli/src/tags.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 4869b8cc5e..3493f61681 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -34,20 +34,27 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> }; if let Some(tags_config) = language_config.tags_config(language)? { - let path_str = format!("{:?}", path); - writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; + let ident = if paths.len() > 1 { + let path_str = format!("{:?}", path); + writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; + "\t" + } else { + "" + }; let source = fs::read(path)?; for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? { let tag = tag?; write!( &mut stdout, - " {:<8} {:<40}\t [{}] {:>9}-{:<9}", - tag.syntax_type, + "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", + ident, str::from_utf8(&source[tag.name_range]).unwrap_or(""), + tag.syntax_type, if tag.is_definition { "def" } else { "ref" }, tag.span.start, tag.span.end, + str::from_utf8(&source[tag.line_range]).unwrap_or(""), )?; if let Some(docs) = tag.docs { if docs.len() > 120 { From bfeec63d60e64ecbf8d6cbb324fd3f3d32ee2295 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 18 Jun 2020 07:12:33 -0700 Subject: [PATCH 071/282] Remove docs site banner for now --- docs/index.html | 128 ------------------------------------------------ 1 file changed, 128 deletions(-) delete mode 100644 docs/index.html diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index 91082812f3..0000000000 --- a/docs/index.html +++ /dev/null @@ -1,128 +0,0 @@ - - - - - - Black Lives Matter - - - - - - - -
-
-

Black Lives Matter

-

We stand in solidarity with George Floyd, Natosha McDade, Yassin Mohamed, Finan H. Berhe, Sean Reed, Steven Demarco Taylor, Breonna Taylor, Ariane McCree, Terrance Franklin, Miles Hall, Darius Tarver, William Green, Samuel David Mallard, Kwame Jones, De’von Bailey, Christopher Whitfield, Anthony Hill, De’Von Bailey, Eric Logan, Jamarion Robinson, Gregory Hill Jr, JaQuavion Slaton, Ryan Twyman, Brandon Webber, Jimmy Atchison, Willie McCoy, Emantic Fitzgerald Bradford J, D’ettrick Griffin, Jemel Roberson, DeAndre Ballard, Botham Shem Jean, Robert Lawrence White, Anthony Lamar Smith, Ramarley Graham, Manuel Loggins Jr, Trayvon Martin, Wendell Allen, Kendrec McDade, Larry Jackson Jr, Jonathan Ferrell, Jordan Baker, Victor White III, Dontre Hamilton, Eric Garner, John Crawford III, Michael Brown, Ezell Ford, Dante Parker, Kajieme Powell, Laquan McDonald, Akai Gurley, Tamir Rice, Rumain Brisbon, Jerame Reid, Charly Keunang, Tony Robinson, Walter Scott, Freddie Gray, Brendon Glenn, Samuel DuBose, Christian Taylor, Jamar Clark, Mario Woods, Quintonio LeGrier, Gregory Gunn, Akiel Denkins, Alton Sterling, Philando Castile, Terrence Sterling, Terence Crutcher, Keith Lamont Scott, Alfred Olango, Jordan Edwards, Stephon Clark, Danny Ray Thomas, DeJuan Guillory, Patrick Harmon, Jonathan Hart, Maurice Granton, Julius Johnson, Jamee Johnson, Michael Dean...

-
- - -
- - From 016ad53a2f4f5a79ef4164eaf57a13e5147eb53a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 07:40:48 -0700 Subject: [PATCH 072/282] Trim end of lines as well --- tags/src/lib.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index d57e3fb52b..1959c75336 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -494,18 +494,27 @@ fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1); let max_line_len = max_line_len.min(text.len() - start); let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len); - trim_start(text, start..end) + trim_end(text, trim_start(text, start..end)) } fn trim_start(text: &[u8], r: Range) -> Range { for (index, c) in text[r.start..r.end].iter().enumerate() { - if !c.is_ascii_whitespace(){ + if !c.is_ascii_whitespace() { return (r.start+index)..r.end } } return r } +fn trim_end(text: &[u8], r: Range) -> Range { + for (index, c) in text[r.start..r.end].iter().rev().enumerate() { + if !c.is_ascii_whitespace() { + return r.start..(r.end-index) + } + } + return r +} + #[cfg(test)] mod tests { use super::*; @@ -528,8 +537,15 @@ mod tests { let text = b" foo\nbar\n"; assert_eq!(line_range(text, 0, 10), 3..6); - let text = b"\t func foo\nbar\n"; + let text = b"\t func foo \nbar\n"; assert_eq!(line_range(text, 0, 10), 2..10); - assert_eq!(line_range(text, 11, 10), 11..14); + + let r = line_range(text, 0, 14); + assert_eq!(r, 2..10); + assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo"); + + let r = line_range(text, 12, 14); + assert_eq!(r, 12..15); + assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar"); } } From 3bcb1f8c9405f77242a0c2f46dabfe4c8e59b53d Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 10:48:33 -0700 Subject: [PATCH 073/282] Assert line trimming --- cli/src/tests/tags_test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index b628350738..02d06ff613 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -108,10 +108,10 @@ fn test_tags_python() { ] ); - assert_eq!(substr(source, &tags[0].line_range), " class Customer:"); + assert_eq!(substr(source, &tags[0].line_range), "class Customer:"); assert_eq!( substr(source, &tags[1].line_range), - " def age(self):" + "def age(self):" ); assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer"); assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); From 54586c4e5bf5536bf075558b0529f4518f348676 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:42:30 -0700 Subject: [PATCH 074/282] Named captures are dynamic New c api for getting list of syntax_type names. --- tags/include/tree_sitter/tags.h | 16 ++--- tags/src/c_lib.rs | 46 +++++++------- tags/src/lib.rs | 105 +++++++++++++------------------- 3 files changed, 69 insertions(+), 98 deletions(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index e1ed68bdf9..f6113a0f5a 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -18,17 +18,6 @@ typedef enum { TSTagsInvalidQuery, } TSTagsError; -typedef enum { - TSSyntaxTypeFunction, - TSSyntaxTypeMethod, - TSSyntaxTypeClass, - TSSyntaxTypeModule, - TSSyntaxTypeCall, - TSSyntaxTypeType, - TSSyntaxTypeInterface, - TSSyntaxTypeImplementation, -} TSTagSyntaxType; - typedef struct { uint32_t start_byte; uint32_t end_byte; @@ -40,7 +29,7 @@ typedef struct { TSPoint end_point; uint32_t docs_start_byte; uint32_t docs_end_byte; - TSTagSyntaxType syntax_type; + uint32_t syntax_type_id; bool is_definition; } TSTag; @@ -93,6 +82,9 @@ uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *); const char *ts_tags_buffer_docs(const TSTagsBuffer *); uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *); +// Get the syntax kinds for a scope. +const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len); + #ifdef __cplusplus } #endif diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 72c708d0f0..6dc4819519 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,4 +1,4 @@ -use super::{Error, SyntaxType, TagsConfiguration, TagsContext}; +use super::{Error, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; use std::process::abort; @@ -19,19 +19,6 @@ pub enum TSTagsError { Unknown, } -#[repr(C)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum TSSyntaxType { - Function, - Method, - Class, - Module, - Call, - Type, - Interface, - Implementation, -} - #[repr(C)] pub struct TSPoint { row: u32, @@ -50,7 +37,7 @@ pub struct TSTag { pub end_point: TSPoint, pub docs_start_byte: u32, pub docs_end_byte: u32, - pub syntax_type: TSSyntaxType, + pub syntax_type_id: u32, pub is_definition: bool, } @@ -173,16 +160,7 @@ pub extern "C" fn ts_tagger_tag( }, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, - syntax_type: match tag.syntax_type { - SyntaxType::Function => TSSyntaxType::Function, - SyntaxType::Method => TSSyntaxType::Method, - SyntaxType::Class => TSSyntaxType::Class, - SyntaxType::Module => TSSyntaxType::Module, - SyntaxType::Call => TSSyntaxType::Call, - SyntaxType::Type => TSSyntaxType::Type, - SyntaxType::Interface => TSSyntaxType::Interface, - SyntaxType::Implementation => TSSyntaxType::Implementation, - }, + syntax_type_id: tag.syntax_type_id, is_definition: tag.is_definition, }); } @@ -231,6 +209,24 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { buffer.docs.len() as u32 } +#[no_mangle] +pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( + this: *mut TSTagger, + scope_name: *const i8, + len: *mut u32, +) -> *const *const i8 { + let tagger = unwrap_mut_ptr(this); + let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; + let len = unwrap_mut_ptr(len); + + *len = 0; + if let Some(config) = tagger.languages.get(scope_name) { + *len = config.c_syntax_type_names.len() as u32; + return config.c_syntax_type_names.as_ptr() as *const *const i8 + } + std::ptr::null() +} + fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { unsafe { result.as_ref() }.unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 1959c75336..3d5ce770a0 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -5,6 +5,7 @@ use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{fmt, mem, str}; +use std::ffi::CStr; use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -19,6 +20,8 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; pub struct TagsConfiguration { pub language: Language, pub query: Query, + syntax_type_names: Vec>, + c_syntax_type_names: Vec<*const u8>, capture_map: HashMap, doc_capture_index: Option, name_capture_index: Option, @@ -30,24 +33,10 @@ pub struct TagsConfiguration { #[derive(Debug)] pub struct NamedCapture { - pub syntax_type: SyntaxType, + pub syntax_type_id: u32, pub is_definition: bool, } -// Should stay in sync with list of valid syntax types in semantic. -// See: https://github.com/github/semantic/blob/621696f5bc523a651f1cf9fc2ac58c557ea02d07/proto/semantic.proto#L165-L174 -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum SyntaxType { - Function, - Method, - Class, - Module, - Call, - Type, - Interface, - Implementation, -} - pub struct TagsContext { parser: Parser, cursor: QueryCursor, @@ -61,7 +50,7 @@ pub struct Tag { pub span: Range, pub docs: Option, pub is_definition: bool, - pub syntax_type: SyntaxType, + pub syntax_type_id: u32, } #[derive(Debug, PartialEq)] @@ -70,6 +59,7 @@ pub enum Error { Regex(regex::Error), Cancelled, InvalidLanguage, + InvalidCapture(String), } #[derive(Debug, Default)] @@ -120,11 +110,13 @@ impl TagsConfiguration { } } - let mut capture_map: HashMap = HashMap::new(); + let mut capture_map = HashMap::new(); + let mut syntax_type_names = Vec::new(); let mut doc_capture_index = None; let mut name_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; + let mut syntax_type_id = 0; for (i, name) in query.capture_names().iter().enumerate() { match name.as_str() { "" => continue, @@ -132,12 +124,32 @@ impl TagsConfiguration { "doc" => doc_capture_index = Some(i as u32), "local.scope" => local_scope_capture_index = Some(i as u32), "local.definition" => local_definition_capture_index = Some(i as u32), - _ => if let Some(nc) = NamedCapture::new(name) { - capture_map.insert(i as u32, nc); + "local.reference" => continue, + _ => { + let mut is_definition = false; + + let kind = if name.starts_with("definition.") { + is_definition = true; + name.trim_start_matches("definition.") + } else if name.starts_with("reference.") { + name.trim_start_matches("reference.") + } else { + return Err(Error::InvalidCapture(name.to_string())) + }.to_string()+"\0"; + + capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition }); + syntax_type_id+=1; + if let Ok(cstr) = CStr::from_bytes_with_nul(kind.as_bytes()) { + syntax_type_names.push(cstr.to_bytes_with_nul().to_vec().into_boxed_slice()); + } } } } + let c_syntax_type_names = syntax_type_names.iter().map( |s| { + s.as_ptr() + }).collect(); + let pattern_info = (0..query.pattern_count()) .map(|pattern_index| { let mut info = PatternInfo::default(); @@ -182,6 +194,8 @@ impl TagsConfiguration { Ok(TagsConfiguration { language, query, + syntax_type_names, + c_syntax_type_names, capture_map, doc_capture_index, name_capture_index, @@ -191,6 +205,13 @@ impl TagsConfiguration { pattern_info, }) } + + pub fn syntax_type_name(&self, id: u32) -> &str { + unsafe { + let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8).to_bytes(); + str::from_utf8(cstr).expect("syntax type name was not valid utf-8") + } + } } impl TagsContext { @@ -301,7 +322,7 @@ where let mut name_range = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; - let mut syntax_type = SyntaxType::Function; + let mut syntax_type_id = 0; let mut is_definition = false; let mut docs_adjacent_node = None; @@ -320,7 +341,7 @@ where if let Some(named_capture) = self.config.capture_map.get(&capture.index) { tag_node = Some(capture.node); - syntax_type = named_capture.syntax_type; + syntax_type_id = named_capture.syntax_type_id; is_definition = named_capture.is_definition; } } @@ -407,7 +428,7 @@ where range, name_range, docs, - syntax_type, + syntax_type_id, is_definition, }; } @@ -421,7 +442,7 @@ where range, name_range, docs, - syntax_type, + syntax_type_id, is_definition, }, mat.pattern_index, @@ -440,44 +461,6 @@ where } } -impl NamedCapture { - pub fn new(name: &String) -> Option { - let mut is_definition = false; - - let kind = if name.starts_with("definition.") { - is_definition = true; - name.trim_start_matches("definition.") - } else if name.starts_with("reference.") { - name.trim_start_matches("reference.") - } else { - name - }; - - let syntax_type = match kind.as_ref() { - "function" => {is_definition = true; SyntaxType::Function}, - "method" => {is_definition = true; SyntaxType::Method}, - "class" => {is_definition = true; SyntaxType::Class}, - "module" => {is_definition = true; SyntaxType::Module}, - "call" => SyntaxType::Call, - "type" => SyntaxType::Type, - "interface" => SyntaxType::Interface, - "implementation" => SyntaxType::Implementation, - _ => return None, - }; - - return Some(NamedCapture{ - syntax_type, - is_definition - }) - } -} - -impl fmt::Display for SyntaxType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - format!("{:?}", self).fmt(f) - } -} - impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 75724698f0b668b6511b8dcf4bf718733abfffb5 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:42:41 -0700 Subject: [PATCH 075/282] Fix up tests --- cli/src/tests/tags_test.rs | 60 +++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 02d06ff613..cc339e0af7 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -2,8 +2,9 @@ use super::helpers::allocations; use super::helpers::fixtures::{get_language, get_language_queries_path}; use std::ffi::CString; use std::{fs, ptr, slice, str}; +use std::ffi::CStr; use tree_sitter_tags::c_lib as c; -use tree_sitter_tags::{Error, SyntaxType, TagsConfiguration, TagsContext}; +use tree_sitter_tags::{Error, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( @@ -97,14 +98,15 @@ fn test_tags_python() { .collect::, _>>() .unwrap(); + assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.syntax_type)) + .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id))) .collect::>(), &[ - ("Customer", SyntaxType::Class), - ("age", SyntaxType::Function), - ("compute_age", SyntaxType::Call), + ("Customer", "class"), + ("age", "function"), + ("compute_age", "call"), ] ); @@ -150,12 +152,12 @@ fn test_tags_javascript() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.syntax_type)) + .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id))) .collect::>(), &[ - ("Customer", SyntaxType::Class), - ("getAge", SyntaxType::Method), - ("Agent", SyntaxType::Class) + ("Customer", "class"), + ("getAge", "method"), + ("Agent", "class") ] ); assert_eq!( @@ -204,18 +206,18 @@ fn test_tags_ruby() { tags.iter() .map(|t| ( substr(source.as_bytes(), &t.name_range), - t.syntax_type, + tags_config.syntax_type_name(t.syntax_type_id), (t.span.start.row, t.span.start.column), )) .collect::>(), &[ - ("foo", SyntaxType::Method, (2, 0)), - ("bar", SyntaxType::Call, (7, 4)), - ("a", SyntaxType::Call, (7, 8)), - ("b", SyntaxType::Call, (7, 11)), - ("each", SyntaxType::Call, (9, 14)), - ("baz", SyntaxType::Call, (13, 8)), - ("b", SyntaxType::Call, (13, 15),), + ("foo", "method", (2, 0)), + ("bar", "call", (7, 4)), + ("a", "call", (7, 8)), + ("b", "call", (7, 11)), + ("each", "call", (9, 14)), + ("baz", "call", (13, 8)), + ("b", "call", (13, 15),), ] ); } @@ -253,6 +255,14 @@ fn test_tags_cancellation() { }); } +#[test] +fn test_invalid_cpature() { + let language = get_language("python"); + let e = TagsConfiguration::new(language, "(identifier) @method", "") + .expect_err("expected InvalidCapture error"); + assert_eq!(e, Error::InvalidCapture("method".to_string())); +} + #[test] fn test_tags_via_c_api() { allocations::record(|| { @@ -316,10 +326,18 @@ fn test_tags_via_c_api() { }) .unwrap(); + let syntax_types: Vec<&str> = unsafe { + let mut len: u32 = 0; + let ptr = c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len); + slice::from_raw_parts(ptr, len as usize).iter().map(|i| { + CStr::from_ptr(*i).to_str().unwrap() + }).collect() + }; + assert_eq!( tags.iter() .map(|tag| ( - tag.syntax_type, + syntax_types[tag.syntax_type_id as usize], &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], @@ -327,18 +345,18 @@ fn test_tags_via_c_api() { .collect::>(), &[ ( - c::TSSyntaxType::Function, + "function", "b", "function b() {", "one\ntwo\nthree" ), ( - c::TSSyntaxType::Class, + "class", "C", "class C extends D {", "four\nfive" ), - (c::TSSyntaxType::Call, "b", "b(a);", "") + ("call", "b", "b(a);", "") ] ); From b6ae67a6100a7c1fa6a249a2b4e0ff04378a41b5 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:43:10 -0700 Subject: [PATCH 076/282] Fix up CLI, use new syntax_type_name --- cli/src/tags.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 3493f61681..515f4c5264 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -50,7 +50,7 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", ident, str::from_utf8(&source[tag.name_range]).unwrap_or(""), - tag.syntax_type, + &tags_config.syntax_type_name(tag.syntax_type_id), if tag.is_definition { "def" } else { "ref" }, tag.span.start, tag.span.end, From 17d26c0d5a5d2b836a0b5f77414c007572589b97 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 14:43:27 -0700 Subject: [PATCH 077/282] Improved errors --- cli/src/error.rs | 2 +- tags/src/lib.rs | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index 824bd92fab..d583d1b93b 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -83,7 +83,7 @@ impl<'a> From for Error { impl<'a> From for Error { fn from(error: tree_sitter_tags::Error) -> Self { - Error::new(format!("{:?}", error)) + Error::new(format!("{}", error)) } } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 3d5ce770a0..07fed3afbf 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -62,6 +62,15 @@ pub enum Error { InvalidCapture(String), } +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name), + _ => write!(f, "{:?}", self) + } + } +} + #[derive(Debug, Default)] struct PatternInfo { docs_adjacent_capture: Option, From ef15f4df24af34f685eefc630b2af69b1ee661b2 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 15:05:08 -0700 Subject: [PATCH 078/282] Dedupe items in syntax_type_names --- tags/src/lib.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 07fed3afbf..128a01cf02 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -5,7 +5,7 @@ use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{fmt, mem, str}; -use std::ffi::CStr; +use std::ffi::{CStr, CString}; use std::collections::HashMap; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -125,7 +125,6 @@ impl TagsConfiguration { let mut name_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; - let mut syntax_type_id = 0; for (i, name) in query.capture_names().iter().enumerate() { match name.as_str() { "" => continue, @@ -144,12 +143,15 @@ impl TagsConfiguration { name.trim_start_matches("reference.") } else { return Err(Error::InvalidCapture(name.to_string())) - }.to_string()+"\0"; - - capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition }); - syntax_type_id+=1; - if let Ok(cstr) = CStr::from_bytes_with_nul(kind.as_bytes()) { - syntax_type_names.push(cstr.to_bytes_with_nul().to_vec().into_boxed_slice()); + }; + + if let Ok(cstr) = CString::new(kind) { + let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice(); + let syntax_type_id = syntax_type_names.iter().position(|n| { n == &c_kind }).unwrap_or_else(|| { + syntax_type_names.push(c_kind); + syntax_type_names.len() - 1 + }) as u32; + capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition }); } } } From f166947abb3fa834463dfb21b0044d30b0617795 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 15:05:27 -0700 Subject: [PATCH 079/282] Test updates, definition/reference prefix is now required --- cli/src/tests/tags_test.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index cc339e0af7..540e2b0138 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -10,65 +10,65 @@ const PYTHON_TAG_QUERY: &'static str = r#" ( (function_definition name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function + body: (block . (expression_statement (string) @doc))) @definition.function (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (function_definition - name: (identifier) @name) @function + name: (identifier) @name) @definition.function ( (class_definition name: (identifier) @name body: (block - . (expression_statement (string) @doc))) @class + . (expression_statement (string) @doc))) @definition.class (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (class_definition - name: (identifier) @name) @class + name: (identifier) @name) @definition.class (call - function: (identifier) @name) @call + function: (identifier) @name) @reference.call "#; const JS_TAG_QUERY: &'static str = r#" ( (comment)* @doc . (class_declaration - name: (identifier) @name) @class - (#select-adjacent! @doc @class) + name: (identifier) @name) @definition.class + (#select-adjacent! @doc @definition.class) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) ( (comment)* @doc . (method_definition - name: (property_identifier) @name) @method - (#select-adjacent! @doc @method) + name: (property_identifier) @name) @definition.method + (#select-adjacent! @doc @definition.method) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) ( (comment)* @doc . (function_declaration - name: (identifier) @name) @function - (#select-adjacent! @doc @function) + name: (identifier) @name) @definition.function + (#select-adjacent! @doc @definition.function) (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") ) (call_expression - function: (identifier) @name) @call + function: (identifier) @name) @reference.call "#; const RUBY_TAG_QUERY: &'static str = r#" (method - name: (identifier) @name) @method + name: (identifier) @name) @definition.method (method_call - method: (identifier) @name) @call + method: (identifier) @name) @reference.call -((identifier) @name @call +((identifier) @name @reference.call (#is-not? local)) "#; @@ -256,7 +256,7 @@ fn test_tags_cancellation() { } #[test] -fn test_invalid_cpature() { +fn test_invalid_capture() { let language = get_language("python"); let e = TagsConfiguration::new(language, "(identifier) @method", "") .expect_err("expected InvalidCapture error"); From d9d3da994218339e525925b6cfda81247a22c001 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Thu, 18 Jun 2020 16:04:05 -0700 Subject: [PATCH 080/282] Fill out rest of c errors --- tags/include/tree_sitter/tags.h | 1 + tags/src/c_lib.rs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index f6113a0f5a..58f5bbd9a1 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -16,6 +16,7 @@ typedef enum { TSTagsInvalidUtf8, TSTagsInvalidRegex, TSTagsInvalidQuery, + TSTagsInvalidCapture, } TSTagsError; typedef struct { diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 6dc4819519..77f8aae5c0 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -16,6 +16,7 @@ pub enum TSTagsError { InvalidUtf8, InvalidRegex, InvalidQuery, + InvalidCapture, Unknown, } @@ -93,7 +94,9 @@ pub extern "C" fn ts_tagger_add_language( } Err(Error::Query(_)) => TSTagsError::InvalidQuery, Err(Error::Regex(_)) => TSTagsError::InvalidRegex, - Err(_) => TSTagsError::Unknown, + Err(Error::Cancelled) => TSTagsError::Timeout, + Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage, + Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture, } } From deeeb67a3b20043e05b7197022aa285fa6b1b58c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Jun 2020 14:20:56 -0700 Subject: [PATCH 081/282] query: Fix handling of alternations under field names (#661) --- cli/src/tests/query_test.rs | 35 +++++++++++++++++++++++++++++++++++ lib/src/query.c | 27 +++++++++++++++++++++------ 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5499048e2c..d4f18c7ddb 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1008,6 +1008,41 @@ fn test_query_matches_with_alternatives_at_root() { }) } +#[test] +fn test_query_matches_with_alternatives_under_fields() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (assignment_expression + left: [ + (identifier) @variable + (member_expression property: (property_identifier) @variable) + ]) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = b; + b = c.d; + e.f = g; + h.i = j.k; + ", + &[ + (0, vec![("variable", "a")]), + (0, vec![("variable", "b")]), + (0, vec![("variable", "f")]), + (0, vec![("variable", "i")]), + ], + ); + }); +} + #[test] fn test_query_matches_in_language_with_simple_aliases() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 59902dee3b..ff243494a2 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -715,7 +715,7 @@ static TSQueryError ts_query__parse_pattern( uint32_t *capture_count, bool is_immediate ) { - uint32_t starting_step_index = self->steps.size; + const uint32_t starting_step_index = self->steps.size; if (stream->next == 0) return TSQueryErrorSyntax; @@ -951,7 +951,6 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse the pattern - uint32_t step_index = self->steps.size; TSQueryError e = ts_query__parse_pattern( self, stream, @@ -972,7 +971,22 @@ static TSQueryError ts_query__parse_pattern( stream->input = field_name; return TSQueryErrorField; } - self->steps.contents[step_index].field = field_id; + + uint32_t step_index = starting_step_index; + QueryStep *step = &self->steps.contents[step_index]; + for (;;) { + step->field = field_id; + if ( + step->alternative_index != NONE && + step->alternative_index > step_index && + step->alternative_index < self->steps.size + ) { + step_index = step->alternative_index; + step = &self->steps.contents[step_index]; + } else { + break; + } + } } else { @@ -1041,15 +1055,16 @@ static TSQueryError ts_query__parse_pattern( length ); + uint32_t step_index = starting_step_index; for (;;) { query_step__add_capture(step, capture_id); if ( step->alternative_index != NONE && - step->alternative_index > starting_step_index && + step->alternative_index > step_index && step->alternative_index < self->steps.size ) { - starting_step_index = step->alternative_index; - step = &self->steps.contents[starting_step_index]; + step_index = step->alternative_index; + step = &self->steps.contents[step_index]; } else { break; } From de53b82e2c48d6221af9881e50cc57d961de3365 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Jun 2020 14:22:37 -0700 Subject: [PATCH 082/282] Remove unnecessary caching on Travis macOS Travis builds have been extremely slow due to the final caching step. --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f19cdbfe28..ab9a686666 100644 --- a/.travis.yml +++ b/.travis.yml @@ -71,5 +71,3 @@ cache: cargo: true directories: - target/emsdk - - test/fixtures/grammars - - $HOME/.emscripten_cache From 6a46dff89a9d9bd9ceb13d7838c1a801974ac08d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 15 Jun 2020 09:58:07 -0700 Subject: [PATCH 083/282] Add ts_language_alias_at helper function --- lib/src/get_changed_ranges.c | 24 +++++++------- lib/src/language.h | 51 +++++++++++++++++++---------- lib/src/subtree.c | 8 ++--- lib/src/tree_cursor.c | 62 +++++++++++++++++++----------------- 4 files changed, 84 insertions(+), 61 deletions(-) diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 5bd1d814bd..b24f314949 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -146,17 +146,21 @@ static bool iterator_tree_is_visible(const Iterator *self) { if (ts_subtree_visible(*entry.subtree)) return true; if (self->cursor.stack.size > 1) { Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; - const TSSymbol *alias_sequence = ts_language_alias_sequence( + return ts_language_alias_at( self->language, - parent.ptr->production_id - ); - return alias_sequence && alias_sequence[entry.structural_child_index] != 0; + parent.ptr->production_id, + entry.structural_child_index + ) != 0; } return false; } -static void iterator_get_visible_state(const Iterator *self, Subtree *tree, - TSSymbol *alias_symbol, uint32_t *start_byte) { +static void iterator_get_visible_state( + const Iterator *self, + Subtree *tree, + TSSymbol *alias_symbol, + uint32_t *start_byte +) { uint32_t i = self->cursor.stack.size - 1; if (self->in_padding) { @@ -169,13 +173,11 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, if (i > 0) { const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; - const TSSymbol *alias_sequence = ts_language_alias_sequence( + *alias_symbol = ts_language_alias_at( self->language, - parent->ptr->production_id + parent->ptr->production_id, + entry.structural_child_index ); - if (alias_sequence) { - *alias_symbol = alias_sequence[entry.structural_child_index]; - } } if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { diff --git a/lib/src/language.h b/lib/src/language.h index 341f0f85af..2bb9a6f9db 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -41,17 +41,21 @@ static inline const TSParseAction *ts_language_actions( return entry.actions; } -static inline bool ts_language_has_actions(const TSLanguage *self, - TSStateId state, - TSSymbol symbol) { +static inline bool ts_language_has_actions( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { TableEntry entry; ts_language_table_entry(self, state, symbol, &entry); return entry.action_count > 0; } -static inline bool ts_language_has_reduce_action(const TSLanguage *self, - TSStateId state, - TSSymbol symbol) { +static inline bool ts_language_has_reduce_action( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { TableEntry entry; ts_language_table_entry(self, state, symbol, &entry); return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; @@ -82,9 +86,11 @@ static inline uint16_t ts_language_lookup( } } -static inline TSStateId ts_language_next_state(const TSLanguage *self, - TSStateId state, - TSSymbol symbol) { +static inline TSStateId ts_language_next_state( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { return 0; } else if (symbol < self->token_count) { @@ -102,9 +108,10 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self, } } -static inline const bool * -ts_language_enabled_external_tokens(const TSLanguage *self, - unsigned external_scanner_state) { +static inline const bool *ts_language_enabled_external_tokens( + const TSLanguage *self, + unsigned external_scanner_state +) { if (external_scanner_state == 0) { return NULL; } else { @@ -112,13 +119,25 @@ ts_language_enabled_external_tokens(const TSLanguage *self, } } -static inline const TSSymbol * -ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) { - return production_id > 0 ? - self->alias_sequences + production_id * self->max_alias_sequence_length : +static inline const TSSymbol *ts_language_alias_sequence( + const TSLanguage *self, + uint32_t production_id +) { + return production_id ? + &self->alias_sequences[production_id * self->max_alias_sequence_length] : NULL; } +static inline TSSymbol ts_language_alias_at( + const TSLanguage *self, + uint32_t production_id, + uint32_t child_index +) { + return production_id ? + self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] : + 0; +} + static inline void ts_language_field_map( const TSLanguage *self, uint32_t production_id, diff --git a/lib/src/subtree.c b/lib/src/subtree.c index ef92a32fe4..24dc06b203 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -360,7 +360,7 @@ void ts_subtree_set_children( self.ptr->has_external_tokens = false; self.ptr->dynamic_precedence = 0; - uint32_t non_extra_index = 0; + uint32_t structural_index = 0; const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t lookahead_end_byte = 0; @@ -387,9 +387,9 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); self.ptr->node_count += ts_subtree_node_count(child); - if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) { + if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { self.ptr->visible_child_count++; - if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) { + if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { self.ptr->named_child_count++; } } else if (ts_subtree_visible(child)) { @@ -407,7 +407,7 @@ void ts_subtree_set_children( self.ptr->parse_state = TS_TREE_STATE_NONE; } - if (!ts_subtree_extra(child)) non_extra_index++; + if (!ts_subtree_extra(child)) structural_index++; } self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 00b9679d73..06c724d282 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -205,19 +205,21 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { TreeCursorEntry *entry = &self->stack.contents[i]; - bool is_aliased = false; - if (i > 0) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; - } - if (ts_subtree_visible(*entry->subtree) || is_aliased) { + if (ts_subtree_visible(*entry->subtree)) { self->stack.size = i + 1; return true; } + if (i > 0 && !ts_subtree_extra(*entry->subtree)) { + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + if (ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + )) { + self->stack.size = i + 1; + return true; + } + } } return false; } @@ -226,15 +228,13 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); TSSymbol alias_symbol = 0; - if (self->stack.size > 1) { + if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( + alias_symbol = ts_language_alias_at( self->tree->language, - parent_entry->subtree->ptr->production_id + parent_entry->subtree->ptr->production_id, + last_entry->structural_child_index ); - if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { - alias_symbol = alias_sequence[last_entry->structural_child_index]; - } } return ts_node_new( self->tree, @@ -263,13 +263,14 @@ TSFieldId ts_tree_cursor_current_status( // Stop walking up when a visible ancestor is found. if (i != self->stack.size - 1) { if (ts_subtree_visible(*entry->subtree)) break; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - if (alias_sequence && alias_sequence[entry->structural_child_index]) { - break; - } + if ( + !ts_subtree_extra(*entry->subtree) && + ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + ) + ) break; } if (ts_subtree_child_count(*parent_entry->subtree) > entry->child_index + 1) { @@ -321,13 +322,14 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { // Stop walking up when another visible node is found. if (i != self->stack.size - 1) { if (ts_subtree_visible(*entry->subtree)) break; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - if (alias_sequence && alias_sequence[entry->structural_child_index]) { - break; - } + if ( + !ts_subtree_extra(*entry->subtree) && + ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + ) + ) break; } if (ts_subtree_extra(*entry->subtree)) break; From 4c2f36a07b99732c96d474fdae30c1cf158b966e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 8 Jun 2020 16:07:22 -0700 Subject: [PATCH 084/282] Mark steps as definite on query construction * Add a ts_query_pattern_is_definite API, just for debugging this * Store state_count on TSLanguage structs, to allow for scanning parse tables --- cli/src/generate/render.rs | 62 +-- cli/src/main.rs | 10 +- cli/src/tests/query_test.rs | 72 +++- lib/binding_rust/bindings.rs | 13 +- lib/binding_rust/lib.rs | 8 + lib/include/tree_sitter/api.h | 8 +- lib/include/tree_sitter/parser.h | 1 + lib/src/array.h | 24 ++ lib/src/language.h | 1 + lib/src/query.c | 632 +++++++++++++++++++++++++++++-- 10 files changed, 755 insertions(+), 76 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 270bd00d4d..300ad38333 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -95,11 +95,7 @@ impl Generator { self.add_stats(); self.add_symbol_enum(); self.add_symbol_names_list(); - - if self.next_abi { - self.add_unique_symbol_map(); - } - + self.add_unique_symbol_map(); self.add_symbol_metadata_list(); if !self.field_names.is_empty() { @@ -177,20 +173,16 @@ impl Generator { // If we are opting in to the new unstable language ABI, then use the concept of // "small parse states". Otherwise, use the same representation for all parse // states. - if self.next_abi { - let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); - self.large_state_count = self - .parse_table - .states - .iter() - .enumerate() - .take_while(|(i, s)| { - *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold - }) - .count(); - } else { - self.large_state_count = self.parse_table.states.len(); - } + let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); + self.large_state_count = self + .parse_table + .states + .iter() + .enumerate() + .take_while(|(i, s)| { + *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold + }) + .count(); } fn add_includes(&mut self) { @@ -256,10 +248,7 @@ impl Generator { "#define STATE_COUNT {}", self.parse_table.states.len() ); - - if self.next_abi { - add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); - } + add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); add_line!( self, @@ -689,17 +678,12 @@ impl Generator { name ); indent!(self); - add_line!(self, "START_LEXER();"); - - if self.next_abi { - add_line!(self, "eof = lexer->eof(lexer);"); - } else { - add_line!(self, "eof = lookahead == 0;"); - } + add_line!(self, "START_LEXER();"); + add_line!(self, "eof = lexer->eof(lexer);"); add_line!(self, "switch (state) {{"); - indent!(self); + indent!(self); for (i, state) in lex_table.states.into_iter().enumerate() { add_line!(self, "case {}:", i); indent!(self); @@ -714,6 +698,7 @@ impl Generator { dedent!(self); add_line!(self, "}}"); + dedent!(self); add_line!(self, "}}"); add_line!(self, ""); @@ -967,12 +952,7 @@ impl Generator { add_line!( self, - "static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{", - if self.next_abi { - "LARGE_STATE_COUNT" - } else { - "STATE_COUNT" - } + "static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{", ); indent!(self); @@ -1224,9 +1204,10 @@ impl Generator { add_line!(self, ".symbol_count = SYMBOL_COUNT,"); add_line!(self, ".alias_count = ALIAS_COUNT,"); add_line!(self, ".token_count = TOKEN_COUNT,"); + add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); if self.next_abi { - add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); + add_line!(self, ".state_count = STATE_COUNT,"); } add_line!(self, ".symbol_metadata = ts_symbol_metadata,"); @@ -1249,10 +1230,7 @@ impl Generator { add_line!(self, ".parse_actions = ts_parse_actions,"); add_line!(self, ".lex_modes = ts_lex_modes,"); add_line!(self, ".symbol_names = ts_symbol_names,"); - - if self.next_abi { - add_line!(self, ".public_symbol_map = ts_symbol_map,"); - } + add_line!(self, ".public_symbol_map = ts_symbol_map,"); if !self.parse_table.production_infos.is_empty() { add_line!( diff --git a/cli/src/main.rs b/cli/src/main.rs index 757c70eb6d..04cd34cd7a 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -149,8 +149,14 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("path").index(1).multiple(true)), ) .subcommand( - SubCommand::with_name("web-ui").about("Test a parser interactively in the browser") - .arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")), + SubCommand::with_name("web-ui") + .about("Test a parser interactively in the browser") + .arg( + Arg::with_name("quiet") + .long("quiet") + .short("q") + .help("open in default browser"), + ), ) .subcommand( SubCommand::with_name("dump-languages") diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index d4f18c7ddb..92aff5fb0c 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1919,7 +1919,7 @@ fn test_query_start_byte_for_pattern() { let patterns_3 = " ((identifier) @b (#match? @b i)) (function_declaration name: (identifier) @c) - (method_definition name: (identifier) @d) + (method_definition name: (property_identifier) @d) " .trim_start(); @@ -2048,6 +2048,76 @@ fn test_query_disable_pattern() { }); } +#[test] +fn test_query_is_definite() { + struct Row { + pattern: &'static str, + results_by_step_index: &'static [(usize, bool)], + } + + let rows = &[ + Row { + pattern: r#"(object "{" "}")"#, + results_by_step_index: &[ + (0, false), + (1, true), // "{" + (2, true), // "}" + ], + }, + Row { + pattern: r#"(pair (property_identifier) ":")"#, + results_by_step_index: &[ + (0, false), + (1, false), // property_identifier + (2, true), // ":"" + ], + }, + Row { + pattern: r#"(object "{" (_) "}")"#, + results_by_step_index: &[ + (0, false), + (1, false), // "{"" + (2, false), // (_) + (3, true), // "}" + ], + }, + Row { + // Named wildcards, fields + pattern: r#"(binary_expression left: (identifier) right: (_))"#, + results_by_step_index: &[ + (0, false), + (1, false), // identifier + (2, true), // (_) + ], + }, + Row { + pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, + results_by_step_index: &[ + (0, false), + (1, true), // identifier + (2, true), // statement_block + ], + }, + ]; + + allocations::record(|| { + let language = get_language("javascript"); + for row in rows.iter() { + let query = Query::new(language, row.pattern).unwrap(); + for (step_index, is_definite) in row.results_by_step_index { + assert_eq!( + query.pattern_is_definite(0, *step_index), + *is_definite, + "Pattern: {:?}, step: {}, expected is_definite to be {}", + row.pattern, + step_index, + is_definite, + ) + } + } + }); +} + fn assert_query_matches( language: Language, query: &Query, diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index cba87fa312..7dc4866079 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -172,9 +172,9 @@ extern "C" { #[doc = " the given ranges must be ordered from earliest to latest in the document,"] #[doc = " and they must not overlap. That is, the following must hold for all"] #[doc = " `i` < `length - 1`:"] - #[doc = " ```text"] + #[doc = ""] #[doc = " ranges[i].end_byte <= ranges[i + 1].start_byte"] - #[doc = " ```"] + #[doc = ""] #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] #[doc = " will not be assigned, and this function will return `false`. On success,"] #[doc = " this function returns `true`"] @@ -649,6 +649,13 @@ extern "C" { length: *mut u32, ) -> *const TSQueryPredicateStep; } +extern "C" { + pub fn ts_query_pattern_is_definite( + self_: *const TSQuery, + pattern_index: u32, + step_index: u32, + ) -> bool; +} extern "C" { #[doc = " Get the name and length of one of the query\'s captures, or one of the"] #[doc = " query\'s string literals. Each capture and string is associated with a"] @@ -800,5 +807,5 @@ extern "C" { pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 11; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9; diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index c0aba32f01..453cb8e7ef 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1449,6 +1449,14 @@ impl Query { unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } } + /// Check if a pattern will definitely match after a certain number of steps + /// have matched. + pub fn pattern_is_definite(&self, index: usize, step_index: usize) -> bool { + unsafe { + ffi::ts_query_pattern_is_definite(self.ptr.as_ptr(), index as u32, step_index as u32) + } + } + fn parse_property( function_name: &str, capture_names: &[String], diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 9d832e6ec4..1b2533fca4 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -21,7 +21,7 @@ extern "C" { * The Tree-sitter library is generally backwards-compatible with languages * generated using older CLI versions, but is not forwards-compatible. */ -#define TREE_SITTER_LANGUAGE_VERSION 11 +#define TREE_SITTER_LANGUAGE_VERSION 12 /** * The earliest ABI version that is supported by the current version of the @@ -718,6 +718,12 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( uint32_t *length ); +bool ts_query_pattern_is_definite( + const TSQuery *self, + uint32_t pattern_index, + uint32_t step_index +); + /** * Get the name and length of one of the query's captures, or one of the * query's string literals. Each capture and string is associated with a diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 11bf4fc42a..360e012f44 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -119,6 +119,7 @@ struct TSLanguage { const uint16_t *small_parse_table; const uint32_t *small_parse_table_map; const TSSymbol *public_symbol_map; + uint32_t state_count; }; /* diff --git a/lib/src/array.h b/lib/src/array.h index 26cb8448f1..c7e0ae4ab8 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -66,6 +66,30 @@ extern "C" { #define array_assign(self, other) \ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) +#define array_search_sorted_by(self, start, field, needle, out_index, out_exists) \ + do { \ + *(out_exists) = false; \ + for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \ + int _comparison = (int)((self)->contents[*(out_index)] field) - (int)(needle); \ + if (_comparison >= 0) { \ + if (_comparison == 0) *(out_exists) = true; \ + break; \ + } \ + } \ + } while (0); + +#define array_search_sorted_with(self, start, compare, needle, out_index, out_exists) \ + do { \ + *(out_exists) = false; \ + for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \ + int _comparison = compare(&(self)->contents[*(out_index)], (needle)); \ + if (_comparison >= 0) { \ + if (_comparison == 0) *(out_exists) = true; \ + break; \ + } \ + } \ + } while (0); + // Private typedef Array(void) VoidArray; diff --git a/lib/src/language.h b/lib/src/language.h index 2bb9a6f9db..288c2a2b09 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -12,6 +12,7 @@ extern "C" { #define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10 #define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11 #define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11 +#define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12 typedef struct { const TSParseAction *actions; diff --git a/lib/src/query.c b/lib/src/query.c index ff243494a2..10ab53719e 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -14,6 +14,8 @@ #define MAX_STATE_COUNT 256 #define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 +#define MAX_STATE_PREDECESSOR_COUNT 100 +#define MAX_WALK_STATE_DEPTH 4 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -55,6 +57,7 @@ typedef struct { bool is_pass_through: 1; bool is_dead_end: 1; bool alternative_is_immediate: 1; + bool is_definite: 1; } QueryStep; /* @@ -89,6 +92,12 @@ typedef struct { uint16_t pattern_index; } PatternEntry; +typedef struct { + Slice predicate_steps; + uint32_t start_byte; + uint32_t start_step; +} QueryPattern; + /* * QueryState - The state of an in-progress match of a particular pattern * in a query. While executing, a `TSQueryCursor` must keep track of a number @@ -138,6 +147,31 @@ typedef struct { uint32_t usage_map; } CaptureListPool; +/* + * WalkState - The state needed for walking the parse table when analyzing + * a query pattern, to determine the steps where the pattern could fail + * to match. + */ +typedef struct { + TSStateId state; + TSSymbol parent_symbol; + uint16_t child_index; + TSFieldId field; +} WalkStateEntry; + +typedef struct { + WalkStateEntry stack[MAX_WALK_STATE_DEPTH]; + uint16_t depth; + uint16_t step_index; +} WalkState; + +/* + * StatePredecessorMap - A map that stores the predecessors of each parse state. + */ +typedef struct { + TSStateId *contents; +} StatePredecessorMap; + /* * TSQuery - A tree query, compiled from a string of S-expressions. The query * itself is immutable. The mutable state used in the process of executing the @@ -149,8 +183,7 @@ struct TSQuery { Array(QueryStep) steps; Array(PatternEntry) pattern_map; Array(TSQueryPredicateStep) predicate_steps; - Array(Slice) predicates_by_pattern; - Array(uint32_t) start_bytes_by_pattern; + Array(QueryPattern) patterns; const TSLanguage *language; uint16_t wildcard_root_pattern_count; TSSymbol *symbol_map; @@ -451,6 +484,7 @@ static QueryStep query_step__new( .is_pattern_start = false, .is_pass_through = false, .is_dead_end = false, + .is_definite = false, .is_immediate = is_immediate, .alternative_is_immediate = false, }; @@ -480,6 +514,67 @@ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { } } +/********************** + * StatePredecessorMap + **********************/ + +static inline StatePredecessorMap state_predecessor_map_new(const TSLanguage *language) { + return (StatePredecessorMap) { + .contents = ts_calloc(language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId)), + }; +} + +static inline void state_predecessor_map_delete(StatePredecessorMap *self) { + ts_free(self->contents); +} + +static inline void state_predecessor_map_add( + StatePredecessorMap *self, + TSStateId state, + TSStateId predecessor +) { + unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); + TSStateId *count = &self->contents[index]; + if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)) { + (*count)++; + self->contents[index + *count] = predecessor; + } +} + +static inline const TSStateId *state_predecessor_map_get( + const StatePredecessorMap *self, + TSStateId state, + unsigned *count +) { + unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); + *count = self->contents[index]; + return &self->contents[index + 1]; +} + +/************ + * WalkState + ************/ + +static inline int walk_state__compare(WalkState *self, WalkState *other) { + if (self->depth < other->depth) return -1; + if (self->depth > other->depth) return 1; + if (self->step_index < other->step_index) return -1; + if (self->step_index > other->step_index) return 1; + for (unsigned i = 0; i < self->depth; i++) { + if (self->stack[i].state < other->stack[i].state) return -1; + if (self->stack[i].state > other->stack[i].state) return 1; + if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1; + if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1; + if (self->stack[i].child_index < other->stack[i].child_index) return -1; + if (self->stack[i].child_index > other->stack[i].child_index) return 1; + } + return 0; +} + +static inline WalkStateEntry *walk_state__top(WalkState *self) { + return &self->stack[self->depth - 1]; +} + /********* * Query *********/ @@ -552,6 +647,466 @@ static inline void ts_query__pattern_map_insert( })); } +static void ts_query__analyze_patterns(TSQuery *self) { + typedef struct { + TSSymbol parent_symbol; + uint32_t parent_step_index; + Array(uint32_t) child_step_indices; + } ParentPattern; + + typedef struct { + TSStateId state; + uint8_t child_index; + uint8_t production_id; + bool done; + } SubgraphNode; + + typedef struct { + TSSymbol symbol; + Array(TSStateId) start_states; + Array(SubgraphNode) nodes; + } SymbolSubgraph; + + typedef Array(WalkState) WalkStateList; + + // Identify all of the patterns in the query that have child patterns. This + // includes both top-level patterns and patterns that are nested within some + // larger pattern. For each of these, record the parent symbol, the step index + // and all of the immediate child step indices in reverse order. + Array(ParentPattern) parent_patterns = array_new(); + Array(uint32_t) stack = array_new(); + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) { + array_clear(&stack); + } else { + uint32_t parent_pattern_index = 0; + while (stack.size > 0) { + parent_pattern_index = *array_back(&stack); + ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index]; + QueryStep *parent_step = &self->steps.contents[parent_pattern->parent_step_index]; + if (parent_step->depth >= step->depth) { + stack.size--; + } else { + break; + } + } + + if (stack.size > 0) { + ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index]; + step->is_definite = true; + array_push(&parent_pattern->child_step_indices, i); + } + + array_push(&stack, parent_patterns.size); + array_push(&parent_patterns, ((ParentPattern) { + .parent_symbol = step->symbol, + .parent_step_index = i, + })); + } + } + for (unsigned i = 0; i < parent_patterns.size; i++) { + ParentPattern *parent_pattern = &parent_patterns.contents[i]; + if (parent_pattern->child_step_indices.size == 0) { + array_erase(&parent_patterns, i); + i--; + } + } + + // Debug + // { + // printf("\nParent pattern entries\n"); + // for (unsigned i = 0; i < parent_patterns.size; i++) { + // ParentPattern *parent_pattern = &parent_patterns.contents[i]; + // printf(" %s ->", ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); + // for (unsigned j = 0; j < parent_pattern->child_step_indices.size; j++) { + // QueryStep *step = &self->steps.contents[parent_pattern->child_step_indices.contents[j]]; + // printf(" %s", ts_language_symbol_name(self->language, step->symbol)); + // } + // printf("\n"); + // } + // } + + // Initialize a set of subgraphs, with one subgraph for each parent symbol, + // in the query, and one subgraph for each hidden symbol. + unsigned subgraph_index = 0, exists; + Array(SymbolSubgraph) subgraphs = array_new(); + for (unsigned i = 0; i < parent_patterns.size; i++) { + TSSymbol parent_symbol = parent_patterns.contents[i].parent_symbol; + array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) { + array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = parent_symbol, })); + } + } + subgraph_index = 0; + for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { + if (!ts_language_symbol_metadata(self->language, sym).visible) { + array_search_sorted_by( + &subgraphs, subgraph_index, + .symbol, sym, + &subgraph_index, &exists + ); + if (!exists) { + array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = sym, })); + subgraph_index++; + } + } + } + + // Scan the parse table to find the data needed for these subgraphs. + // Collect three things during this scan: + // 1) All of the parse states where one of these symbols can start. + // 2) All of the parse states where one of these symbols can end, along + // with information about the node that would be created. + // 3) A list of predecessor states for each state. + StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); + for (TSStateId state = 1; state < self->language->state_count; state++) { + unsigned subgraph_index = 0, exists; + for (TSSymbol sym = 0; sym < self->language->token_count; sym++) { + unsigned count; + const TSParseAction *actions = ts_language_actions(self->language, state, sym, &count); + for (unsigned i = 0; i < count; i++) { + const TSParseAction *action = &actions[i]; + if (action->type == TSParseActionTypeReduce) { + unsigned exists; + array_search_sorted_by( + &subgraphs, + subgraph_index, + .symbol, + action->params.reduce.symbol, + &subgraph_index, + &exists + ); + if (exists) { + SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { + array_push(&subgraph->nodes, ((SubgraphNode) { + .state = state, + .production_id = action->params.reduce.production_id, + .child_index = action->params.reduce.child_count, + .done = true, + })); + } + } + } else if ( + action->type == TSParseActionTypeShift && + !action->params.shift.extra + ) { + TSStateId next_state = action->params.shift.state; + state_predecessor_map_add(&predecessor_map, next_state, state); + } + } + } + for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { + TSStateId next_state = ts_language_next_state(self->language, state, sym); + if (next_state != 0) { + state_predecessor_map_add(&predecessor_map, next_state, state); + array_search_sorted_by( + &subgraphs, + subgraph_index, + .symbol, + sym, + &subgraph_index, + &exists + ); + if (exists) { + SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + array_push(&subgraph->start_states, state); + } + } + } + } + + // For each subgraph, compute the remainder of the nodes by walking backward + // from the end states using the predecessor map. + Array(SubgraphNode) next_nodes = array_new(); + for (unsigned i = 0; i < subgraphs.size; i++) { + SymbolSubgraph *subgraph = &subgraphs.contents[i]; + if (subgraph->nodes.size == 0) { + array_delete(&subgraph->start_states); + array_erase(&subgraphs, i); + i--; + continue; + } + array_assign(&next_nodes, &subgraph->nodes); + while (next_nodes.size > 0) { + SubgraphNode node = array_pop(&next_nodes); + if (node.child_index > 1) { + unsigned predecessor_count; + const TSStateId *predecessors = state_predecessor_map_get( + &predecessor_map, + node.state, + &predecessor_count + ); + for (unsigned j = 0; j < predecessor_count; j++) { + SubgraphNode predecessor_node = { + .state = predecessors[j], + .child_index = node.child_index - 1, + .production_id = node.production_id, + .done = false, + }; + unsigned index, exists; + array_search_sorted_by(&subgraph->nodes, 0, .state, predecessor_node.state, &index, &exists); + if (!exists) { + array_insert(&subgraph->nodes, index, predecessor_node); + array_push(&next_nodes, predecessor_node); + } + } + } + } + } + + // Debug + // { + // printf("\nSubgraphs:\n"); + // for (unsigned i = 0; i < subgraphs.size; i++) { + // SymbolSubgraph *subgraph = &subgraphs.contents[i]; + // printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); + // for (unsigned j = 0; j < subgraph->nodes.size; j++) { + // SubgraphNode *node = &subgraph->nodes.contents[j]; + // printf(" {state: %u, child_index: %u}\n", node->state, node->child_index); + // } + // printf("\n"); + // } + // } + + // For each non-terminal pattern, determine if the pattern can successfully match, + // and all of the possible children within the pattern where matching could fail. + WalkStateList walk_states = array_new(); + WalkStateList next_walk_states = array_new(); + Array(uint16_t) finished_step_indices = array_new(); + for (unsigned i = 0; i < parent_patterns.size; i++) { + ParentPattern *parent_pattern = &parent_patterns.contents[i]; + unsigned subgraph_index, exists; + array_search_sorted_by(&subgraphs, 0, .symbol, parent_pattern->parent_symbol, &subgraph_index, &exists); + if (!exists) { + // TODO - what to do for ERROR patterns + continue; + } + SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + + // Initialize a walk at every possible parse state where this non-terminal + // symbol can start. + array_clear(&walk_states); + for (unsigned j = 0; j < subgraph->start_states.size; j++) { + TSStateId state = subgraph->start_states.contents[j]; + array_push(&walk_states, ((WalkState) { + .step_index = 0, + .stack = { + [0] = { + .state = state, + .child_index = 0, + .parent_symbol = subgraph->symbol, + .field = 0, + }, + }, + .depth = 1, + })); + } + + // Walk the subgraph for this non-terminal, tracking all of the possible + // sequences of progress within the pattern. + array_clear(&finished_step_indices); + while (walk_states.size > 0) { + // Debug + // { + // printf("Walk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); + // for (unsigned j = 0; j < walk_states.size; j++) { + // WalkState *walk_state = &walk_states.contents[j]; + // printf( + // " %u: {depth: %u, step: %u, state: %u, child_index: %u, parent: %s}\n", + // j, + // walk_state->depth, + // walk_state->step_index, + // walk_state->stack[walk_state->depth - 1].state, + // walk_state->stack[walk_state->depth - 1].child_index, + // ts_language_symbol_name(self->language, walk_state->stack[walk_state->depth - 1].parent_symbol) + // ); + // } + + // printf("\nFinished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); + // for (unsigned j = 0; j < finished_step_indices.size; j++) { + // printf(" %u", finished_step_indices.contents[j]); + // } + // printf("\n\n"); + // } + + array_clear(&next_walk_states); + for (unsigned j = 0; j < walk_states.size; j++) { + WalkState *walk_state = &walk_states.contents[j]; + TSStateId state = walk_state->stack[walk_state->depth - 1].state; + unsigned child_index = walk_state->stack[walk_state->depth - 1].child_index; + TSSymbol parent_symbol = walk_state->stack[walk_state->depth - 1].parent_symbol; + + unsigned subgraph_index, exists; + array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); + if (!exists) continue; + SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + + for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { + TSStateId successor_state = ts_language_next_state(self->language, state, sym); + if (successor_state && successor_state != state) { + unsigned node_index; + array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists); + if (exists) { + SubgraphNode *node = &subgraph->nodes.contents[node_index]; + if (node->child_index != child_index + 1) continue; + + WalkState next_walk_state = *walk_state; + walk_state__top(&next_walk_state)->child_index++; + walk_state__top(&next_walk_state)->state = successor_state; + + bool does_match = true; + unsigned step_index = parent_pattern->child_step_indices.contents[walk_state->step_index]; + QueryStep *step = &self->steps.contents[step_index]; + TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + TSSymbol visible_symbol = alias + ? alias + : self->language->symbol_metadata[sym].visible + ? self->language->public_symbol_map[sym] + : 0; + if (visible_symbol) { + if (step->symbol == NAMED_WILDCARD_SYMBOL) { + if (!ts_language_symbol_metadata(self->language, visible_symbol).named) does_match = false; + } else if (step->symbol != WILDCARD_SYMBOL) { + if (step->symbol != visible_symbol) does_match = false; + } + } else if (next_walk_state.depth < MAX_WALK_STATE_DEPTH) { + does_match = false; + next_walk_state.depth++; + walk_state__top(&next_walk_state)->state = state; + walk_state__top(&next_walk_state)->child_index = 0; + walk_state__top(&next_walk_state)->parent_symbol = sym; + } else { + continue; + } + + TSFieldId field_id = 0; + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (field_map->child_index == child_index) { + field_id = field_map->field_id; + break; + } + } + + if (does_match) { + next_walk_state.step_index++; + } + + if (node->done) { + next_walk_state.depth--; + } + + if ( + next_walk_state.depth == 0 || + next_walk_state.step_index == parent_pattern->child_step_indices.size + ) { + unsigned index, exists; + array_search_sorted_by(&finished_step_indices, 0, , next_walk_state.step_index, &index, &exists); + if (!exists) array_insert(&finished_step_indices, index, next_walk_state.step_index); + continue; + } + + unsigned index, exists; + array_search_sorted_with( + &next_walk_states, + 0, + walk_state__compare, + &next_walk_state, + &index, + &exists + ); + if (!exists) { + array_insert(&next_walk_states, index, next_walk_state); + } + } + } + } + } + + WalkStateList _walk_states = walk_states; + walk_states = next_walk_states; + next_walk_states = _walk_states; + } + + // Debug + // { + // printf("Finished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); + // for (unsigned j = 0; j < finished_step_indices.size; j++) { + // printf(" %u", finished_step_indices.contents[j]); + // } + // printf("\n\n"); + // } + + // A query step is definite if the containing pattern will definitely match + // once the step is reached. In other words, a step is *not* definite if + // it's possible to create a syntax node that matches up to until that step, + // but does not match the entire pattern. + for (unsigned j = 0, n = parent_pattern->child_step_indices.size; j < n; j++) { + uint32_t step_index = parent_pattern->child_step_indices.contents[j]; + for (unsigned k = 0; k < finished_step_indices.size; k++) { + uint32_t finished_step_index = finished_step_indices.contents[k]; + if (finished_step_index >= j && finished_step_index < n) { + QueryStep *step = &self->steps.contents[step_index]; + step->is_definite = false; + break; + } + } + } + } + + // In order for a parent step to be definite, all of its child steps must + // be definite. Propagate the definiteness up the pattern trees by walking + // the query's steps in reverse. + for (unsigned i = self->steps.size - 1; i + 1 > 0; i--) { + QueryStep *step = &self->steps.contents[i]; + for (unsigned j = i + 1; j < self->steps.size; j++) { + QueryStep *child_step = &self->steps.contents[j]; + if (child_step->depth <= step->depth) break; + if (child_step->depth == step->depth + 1 && !child_step->is_definite) { + step->is_definite = false; + break; + } + } + } + + // Debug + // { + // printf("\nSteps:\n"); + // for (unsigned i = 0; i < self->steps.size; i++) { + // QueryStep *step = &self->steps.contents[i]; + // if (step->depth == PATTERN_DONE_MARKER) { + // printf("\n"); + // continue; + // } + // printf( + // " {symbol: %s, is_definite: %d}\n", + // (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) ? "ANY" : ts_language_symbol_name(self->language, step->symbol), + // step->is_definite + // ); + // } + // } + + // Cleanup + for (unsigned i = 0; i < parent_patterns.size; i++) { + array_delete(&parent_patterns.contents[i].child_step_indices); + } + for (unsigned i = 0; i < subgraphs.size; i++) { + array_delete(&subgraphs.contents[i].start_states); + array_delete(&subgraphs.contents[i].nodes); + } + array_delete(&stack); + array_delete(&subgraphs); + array_delete(&next_nodes); + array_delete(&walk_states); + array_delete(&parent_patterns); + array_delete(&next_walk_states); + array_delete(&finished_step_indices); + state_predecessor_map_delete(&predecessor_map); +} + static void ts_query__finalize_steps(TSQuery *self) { for (unsigned i = 0; i < self->steps.size; i++) { QueryStep *step = &self->steps.contents[i]; @@ -588,7 +1143,7 @@ static TSQueryError ts_query__parse_predicate( predicate_name, length ); - array_back(&self->predicates_by_pattern)->length++; + array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, @@ -599,7 +1154,7 @@ static TSQueryError ts_query__parse_predicate( if (stream->next == ')') { stream_advance(stream); stream_skip_whitespace(stream); - array_back(&self->predicates_by_pattern)->length++; + array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeDone, .value_id = 0, @@ -628,7 +1183,7 @@ static TSQueryError ts_query__parse_predicate( return TSQueryErrorCapture; } - array_back(&self->predicates_by_pattern)->length++; + array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeCapture, .value_id = capture_id, @@ -668,7 +1223,7 @@ static TSQueryError ts_query__parse_predicate( string_content, length ); - array_back(&self->predicates_by_pattern)->length++; + array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, @@ -688,7 +1243,7 @@ static TSQueryError ts_query__parse_predicate( symbol_start, length ); - array_back(&self->predicates_by_pattern)->length++; + array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, @@ -712,7 +1267,6 @@ static TSQueryError ts_query__parse_pattern( TSQuery *self, Stream *stream, uint32_t depth, - uint32_t *capture_count, bool is_immediate ) { const uint32_t starting_step_index = self->steps.size; @@ -737,7 +1291,6 @@ static TSQueryError ts_query__parse_pattern( self, stream, depth, - capture_count, is_immediate ); @@ -790,7 +1343,6 @@ static TSQueryError ts_query__parse_pattern( self, stream, depth, - capture_count, child_is_immediate ); if (e == PARENT_DONE && stream->next == ')') { @@ -871,7 +1423,6 @@ static TSQueryError ts_query__parse_pattern( self, stream, depth + 1, - capture_count, child_is_immediate ); if (e == PARENT_DONE && stream->next == ')') { @@ -955,7 +1506,6 @@ static TSQueryError ts_query__parse_pattern( self, stream, depth, - capture_count, is_immediate ); if (e == PARENT_DONE) return TSQueryErrorSyntax; @@ -1069,8 +1619,6 @@ static TSQueryError ts_query__parse_pattern( break; } } - - (*capture_count)++; } // No more suffix modifiers @@ -1123,7 +1671,7 @@ TSQuery *ts_query_new( .captures = symbol_table_new(), .predicate_values = symbol_table_new(), .predicate_steps = array_new(), - .predicates_by_pattern = array_new(), + .patterns = array_new(), .symbol_map = symbol_map, .wildcard_root_pattern_count = 0, .language = language, @@ -1133,15 +1681,14 @@ TSQuery *ts_query_new( Stream stream = stream_new(source, source_len); stream_skip_whitespace(&stream); while (stream.input < stream.end) { - uint32_t pattern_index = self->predicates_by_pattern.size; + uint32_t pattern_index = self->patterns.size; uint32_t start_step_index = self->steps.size; - uint32_t capture_count = 0; - array_push(&self->start_bytes_by_pattern, stream.input - source); - array_push(&self->predicates_by_pattern, ((Slice) { - .offset = self->predicate_steps.size, - .length = 0, + array_push(&self->patterns, ((QueryPattern) { + .predicate_steps = (Slice) {.offset = self->predicate_steps.size, .length = 0}, + .start_byte = stream.input - source, + .start_step = self->steps.size, })); - *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count, false); + *error_type = ts_query__parse_pattern(self, &stream, 0, false); array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); // If any pattern could not be parsed, then report the error information @@ -1183,6 +1730,10 @@ TSQuery *ts_query_new( } } + if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { + ts_query__analyze_patterns(self); + } + ts_query__finalize_steps(self); return self; } @@ -1192,8 +1743,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->steps); array_delete(&self->pattern_map); array_delete(&self->predicate_steps); - array_delete(&self->predicates_by_pattern); - array_delete(&self->start_bytes_by_pattern); + array_delete(&self->patterns); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); ts_free(self->symbol_map); @@ -1202,7 +1752,7 @@ void ts_query_delete(TSQuery *self) { } uint32_t ts_query_pattern_count(const TSQuery *self) { - return self->predicates_by_pattern.size; + return self->patterns.size; } uint32_t ts_query_capture_count(const TSQuery *self) { @@ -1234,7 +1784,7 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( uint32_t pattern_index, uint32_t *step_count ) { - Slice slice = self->predicates_by_pattern.contents[pattern_index]; + Slice slice = self->patterns.contents[pattern_index].predicate_steps; *step_count = slice.length; return &self->predicate_steps.contents[slice.offset]; } @@ -1243,7 +1793,35 @@ uint32_t ts_query_start_byte_for_pattern( const TSQuery *self, uint32_t pattern_index ) { - return self->start_bytes_by_pattern.contents[pattern_index]; + return self->patterns.contents[pattern_index].start_byte; +} + +bool ts_query_pattern_is_definite( + const TSQuery *self, + uint32_t pattern_index, + uint32_t step_count +) { + uint32_t step_index = self->patterns.contents[pattern_index].start_step; + for (;;) { + QueryStep *start_step = &self->steps.contents[step_index]; + if (step_index + step_count < self->steps.size) { + QueryStep *step = start_step; + for (unsigned i = 0; i < step_count; i++) { + if (step->depth == PATTERN_DONE_MARKER) { + step = NULL; + break; + } + step++; + } + if (step && !step->is_definite) return false; + } + if (start_step->alternative_index != NONE && start_step->alternative_index > step_index) { + step_index = start_step->alternative_index; + } else { + break; + } + } + return true; } void ts_query_disable_capture( From 7f955419a88caada8455f8f73230a7b32712b30c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 22 Jun 2020 16:20:49 -0700 Subject: [PATCH 085/282] Start work on recognizing impossible patterns --- cli/src/error.rs | 4 + cli/src/tests/query_test.rs | 46 ++++++++- lib/binding_rust/bindings.rs | 1 + lib/binding_rust/lib.rs | 54 ++++++---- lib/binding_web/binding.js | 3 + lib/include/tree_sitter/api.h | 1 + lib/src/query.c | 182 +++++++++++++++++++++++----------- 7 files changed, 207 insertions(+), 84 deletions(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index 824bd92fab..4b493019ab 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -70,6 +70,10 @@ impl<'a> From for Error { "Query error on line {}. Invalid syntax:\n{}", row, l )), + QueryError::Pattern(row, l) => Error::new(format!( + "Query error on line {}. Impossible pattern:\n{}", + row, l + )), QueryError::Predicate(p) => Error::new(format!("Query error: {}", p)), } } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 92aff5fb0c..cc42a70d2c 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -12,7 +12,11 @@ fn test_query_errors_on_invalid_syntax() { let language = get_language("javascript"); assert!(Query::new(language, "(if_statement)").is_ok()); - assert!(Query::new(language, "(if_statement condition:(identifier))").is_ok()); + assert!(Query::new( + language, + "(if_statement condition:(parenthesized_expression (identifier)))" + ) + .is_ok()); // Mismatched parens assert_eq!( @@ -180,6 +184,28 @@ fn test_query_errors_on_invalid_conditions() { }); } +#[test] +fn test_query_errors_on_impossible_patterns() { + allocations::record(|| { + let language = get_language("javascript"); + + assert_eq!( + Query::new( + language, + "(binary_expression left:(identifier) left:(identifier))" + ), + Err(QueryError::Pattern( + 1, + [ + "(binary_expression left:(identifier) left:(identifier))", // + "^" + ] + .join("\n") + )) + ); + }); +} + #[test] fn test_query_matches_with_simple_pattern() { allocations::record(|| { @@ -1946,10 +1972,10 @@ fn test_query_capture_names() { language, r#" (if_statement - condition: (binary_expression + condition: (parenthesized_expression (binary_expression left: _ @left-operand operator: "||" - right: _ @right-operand) + right: _ @right-operand)) consequence: (statement_block) @body) (while_statement @@ -2051,12 +2077,14 @@ fn test_query_disable_pattern() { #[test] fn test_query_is_definite() { struct Row { + language: Language, pattern: &'static str, results_by_step_index: &'static [(usize, bool)], } let rows = &[ Row { + language: get_language("javascript"), pattern: r#"(object "{" "}")"#, results_by_step_index: &[ (0, false), @@ -2065,6 +2093,7 @@ fn test_query_is_definite() { ], }, Row { + language: get_language("javascript"), pattern: r#"(pair (property_identifier) ":")"#, results_by_step_index: &[ (0, false), @@ -2073,6 +2102,7 @@ fn test_query_is_definite() { ], }, Row { + language: get_language("javascript"), pattern: r#"(object "{" (_) "}")"#, results_by_step_index: &[ (0, false), @@ -2083,6 +2113,7 @@ fn test_query_is_definite() { }, Row { // Named wildcards, fields + language: get_language("javascript"), pattern: r#"(binary_expression left: (identifier) right: (_))"#, results_by_step_index: &[ (0, false), @@ -2091,6 +2122,7 @@ fn test_query_is_definite() { ], }, Row { + language: get_language("javascript"), pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, results_by_step_index: &[ (0, false), @@ -2098,12 +2130,16 @@ fn test_query_is_definite() { (2, true), // statement_block ], }, + Row { + language: get_language("javascript"), + pattern: r#""#, + results_by_step_index: &[], + }, ]; allocations::record(|| { - let language = get_language("javascript"); for row in rows.iter() { - let query = Query::new(language, row.pattern).unwrap(); + let query = Query::new(row.language, row.pattern).unwrap(); for (step_index, is_definite) in row.results_by_step_index { assert_eq!( query.pattern_is_definite(0, *step_index), diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 7dc4866079..167edebf5f 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -132,6 +132,7 @@ pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; +pub const TSQueryError_TSQueryErrorPattern: TSQueryError = 5; pub type TSQueryError = u32; extern "C" { #[doc = " Create a new parser."] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 453cb8e7ef..d3284974a1 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -163,6 +163,7 @@ pub enum QueryError { Field(usize, String), Capture(usize, String), Predicate(String), + Pattern(usize, String), } #[derive(Debug)] @@ -1175,27 +1176,42 @@ impl Query { } }); - let message = if let Some(line) = line_containing_error { - line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" - } else { - "Unexpected EOF".to_string() - }; + return match error_type { + // Error types that report names + ffi::TSQueryError_TSQueryErrorNodeType + | ffi::TSQueryError_TSQueryErrorField + | ffi::TSQueryError_TSQueryErrorCapture => { + let suffix = source.split_at(offset).1; + let end_offset = suffix + .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') + .unwrap_or(source.len()); + let name = suffix.split_at(end_offset).0.to_string(); + match error_type { + ffi::TSQueryError_TSQueryErrorNodeType => { + Err(QueryError::NodeType(row, name)) + } + ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(row, name)), + ffi::TSQueryError_TSQueryErrorCapture => { + Err(QueryError::Capture(row, name)) + } + _ => unreachable!(), + } + } - // if line_containing_error - return if error_type != ffi::TSQueryError_TSQueryErrorSyntax { - let suffix = source.split_at(offset).1; - let end_offset = suffix - .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') - .unwrap_or(source.len()); - let name = suffix.split_at(end_offset).0.to_string(); - match error_type { - ffi::TSQueryError_TSQueryErrorNodeType => Err(QueryError::NodeType(row, name)), - ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(row, name)), - ffi::TSQueryError_TSQueryErrorCapture => Err(QueryError::Capture(row, name)), - _ => Err(QueryError::Syntax(row, message)), + // Error types that report positions + _ => { + let message = if let Some(line) = line_containing_error { + line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" + } else { + "Unexpected EOF".to_string() + }; + match error_type { + ffi::TSQueryError_TSQueryErrorPattern => { + Err(QueryError::Pattern(row, message)) + } + _ => Err(QueryError::Syntax(row, message)), + } } - } else { - Err(QueryError::Syntax(row, message)) }; } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 567b7eb317..cd8bec756c 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -680,6 +680,9 @@ class Language { case 4: error = new RangeError(`Bad capture name @${word}`); break; + case 5: + error = new SyntaxError(`Impossible pattern at offset ${errorIndex}: '${suffix}'...`); + break; default: error = new SyntaxError(`Bad syntax at offset ${errorIndex}: '${suffix}'...`); break; diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1b2533fca4..1abbf28cef 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -130,6 +130,7 @@ typedef enum { TSQueryErrorNodeType, TSQueryErrorField, TSQueryErrorCapture, + TSQueryErrorPattern, } TSQueryError; /********************/ diff --git a/lib/src/query.c b/lib/src/query.c index 10ab53719e..0b7530da96 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -156,7 +156,8 @@ typedef struct { TSStateId state; TSSymbol parent_symbol; uint16_t child_index; - TSFieldId field; + TSFieldId field_id: 15; + bool done: 1; } WalkStateEntry; typedef struct { @@ -165,6 +166,19 @@ typedef struct { uint16_t step_index; } WalkState; +typedef struct { + TSStateId state; + uint8_t production_id; + uint8_t child_index: 7; + bool done: 1; +} SubgraphNode; + +typedef struct { + TSSymbol symbol; + Array(TSStateId) start_states; + Array(SubgraphNode) nodes; +} SymbolSubgraph; + /* * StatePredecessorMap - A map that stores the predecessors of each parse state. */ @@ -571,6 +585,16 @@ static inline int walk_state__compare(WalkState *self, WalkState *other) { return 0; } +static inline int subgraph_node__compare(SubgraphNode *self, SubgraphNode *other) { + if (self->state < other->state) return -1; + if (self->state > other->state) return 1; + if (self->child_index < other->child_index) return -1; + if (self->child_index > other->child_index) return 1; + if (self->production_id < other->production_id) return -1; + if (self->production_id > other->production_id) return 1; + return 0; +} + static inline WalkStateEntry *walk_state__top(WalkState *self) { return &self->stack[self->depth - 1]; } @@ -647,28 +671,17 @@ static inline void ts_query__pattern_map_insert( })); } -static void ts_query__analyze_patterns(TSQuery *self) { +static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index) { typedef struct { TSSymbol parent_symbol; uint32_t parent_step_index; Array(uint32_t) child_step_indices; } ParentPattern; - typedef struct { - TSStateId state; - uint8_t child_index; - uint8_t production_id; - bool done; - } SubgraphNode; - - typedef struct { - TSSymbol symbol; - Array(TSStateId) start_states; - Array(SubgraphNode) nodes; - } SymbolSubgraph; - typedef Array(WalkState) WalkStateList; + bool result = true; + // Identify all of the patterns in the query that have child patterns. This // includes both top-level patterns and patterns that are nested within some // larger pattern. For each of these, record the parent symbol, the step index @@ -846,7 +859,11 @@ static void ts_query__analyze_patterns(TSQuery *self) { .done = false, }; unsigned index, exists; - array_search_sorted_by(&subgraph->nodes, 0, .state, predecessor_node.state, &index, &exists); + array_search_sorted_with( + &subgraph->nodes, 0, + subgraph_node__compare, &predecessor_node, + &index, &exists + ); if (!exists) { array_insert(&subgraph->nodes, index, predecessor_node); array_push(&next_nodes, predecessor_node); @@ -897,7 +914,8 @@ static void ts_query__analyze_patterns(TSQuery *self) { .state = state, .child_index = 0, .parent_symbol = subgraph->symbol, - .field = 0, + .field_id = 0, + .done = false, }, }, .depth = 1, @@ -923,20 +941,14 @@ static void ts_query__analyze_patterns(TSQuery *self) { // ts_language_symbol_name(self->language, walk_state->stack[walk_state->depth - 1].parent_symbol) // ); // } - - // printf("\nFinished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); - // for (unsigned j = 0; j < finished_step_indices.size; j++) { - // printf(" %u", finished_step_indices.contents[j]); - // } - // printf("\n\n"); // } array_clear(&next_walk_states); for (unsigned j = 0; j < walk_states.size; j++) { WalkState *walk_state = &walk_states.contents[j]; - TSStateId state = walk_state->stack[walk_state->depth - 1].state; - unsigned child_index = walk_state->stack[walk_state->depth - 1].child_index; - TSSymbol parent_symbol = walk_state->stack[walk_state->depth - 1].parent_symbol; + TSStateId state = walk_state__top(walk_state)->state; + unsigned child_index = walk_state__top(walk_state)->child_index; + TSSymbol parent_symbol = walk_state__top(walk_state)->parent_symbol; unsigned subgraph_index, exists; array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); @@ -948,15 +960,14 @@ static void ts_query__analyze_patterns(TSQuery *self) { if (successor_state && successor_state != state) { unsigned node_index; array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists); - if (exists) { - SubgraphNode *node = &subgraph->nodes.contents[node_index]; - if (node->child_index != child_index + 1) continue; + while (exists && node_index < subgraph->nodes.size) { + SubgraphNode *node = &subgraph->nodes.contents[node_index++]; + if (node->state != successor_state || node->child_index != child_index + 1) continue; WalkState next_walk_state = *walk_state; walk_state__top(&next_walk_state)->child_index++; walk_state__top(&next_walk_state)->state = successor_state; - bool does_match = true; unsigned step_index = parent_pattern->child_step_indices.contents[walk_state->step_index]; QueryStep *step = &self->steps.contents[step_index]; TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); @@ -965,44 +976,56 @@ static void ts_query__analyze_patterns(TSQuery *self) { : self->language->symbol_metadata[sym].visible ? self->language->public_symbol_map[sym] : 0; + + TSFieldId field_id = 0; + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (field_map->child_index == child_index) { + field_id = field_map->field_id; + break; + } + } + + if (node->done) { + walk_state__top(&next_walk_state)->done = true; + } + + bool does_match = true; if (visible_symbol) { if (step->symbol == NAMED_WILDCARD_SYMBOL) { - if (!ts_language_symbol_metadata(self->language, visible_symbol).named) does_match = false; + if (!self->language->symbol_metadata[visible_symbol].named) does_match = false; } else if (step->symbol != WILDCARD_SYMBOL) { if (step->symbol != visible_symbol) does_match = false; } + + if (step->field) { + bool does_match_field = step->field == field_id; + if (!does_match_field) { + for (unsigned i = 0; i < walk_state->depth; i++) { + if (walk_state->stack[i].field_id == step->field) { + does_match_field = true; + } + } + } + does_match &= does_match_field; + } } else if (next_walk_state.depth < MAX_WALK_STATE_DEPTH) { does_match = false; next_walk_state.depth++; walk_state__top(&next_walk_state)->state = state; walk_state__top(&next_walk_state)->child_index = 0; walk_state__top(&next_walk_state)->parent_symbol = sym; + walk_state__top(&next_walk_state)->field_id = field_id; } else { continue; } - TSFieldId field_id = 0; - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (field_map->child_index == child_index) { - field_id = field_map->field_id; - break; - } - } - if (does_match) { next_walk_state.step_index++; } - if (node->done) { - next_walk_state.depth--; - } - - if ( - next_walk_state.depth == 0 || - next_walk_state.step_index == parent_pattern->child_step_indices.size - ) { + if (next_walk_state.step_index == parent_pattern->child_step_indices.size) { unsigned index, exists; array_search_sorted_by(&finished_step_indices, 0, , next_walk_state.step_index, &index, &exists); if (!exists) array_insert(&finished_step_indices, index, next_walk_state.step_index); @@ -1011,19 +1034,39 @@ static void ts_query__analyze_patterns(TSQuery *self) { unsigned index, exists; array_search_sorted_with( - &next_walk_states, - 0, - walk_state__compare, - &next_walk_state, - &index, - &exists + &next_walk_states, 0, + walk_state__compare, &next_walk_state, + &index, &exists ); - if (!exists) { - array_insert(&next_walk_states, index, next_walk_state); - } + if (!exists) array_insert(&next_walk_states, index, next_walk_state); } } } + + bool did_pop = false; + while (walk_state->depth > 0 && walk_state__top(walk_state)->done) { + walk_state->depth--; + did_pop = true; + } + + if (did_pop) { + if (walk_state->depth == 0) { + unsigned index, exists; + array_search_sorted_by(&finished_step_indices, 0, , walk_state->step_index, &index, &exists); + if (!exists) array_insert(&finished_step_indices, index, walk_state->step_index); + } else { + unsigned index, exists; + array_search_sorted_with( + &next_walk_states, + 0, + walk_state__compare, + walk_state, + &index, + &exists + ); + if (!exists) array_insert(&next_walk_states, index, *walk_state); + } + } } WalkStateList _walk_states = walk_states; @@ -1037,7 +1080,7 @@ static void ts_query__analyze_patterns(TSQuery *self) { // for (unsigned j = 0; j < finished_step_indices.size; j++) { // printf(" %u", finished_step_indices.contents[j]); // } - // printf("\n\n"); + // printf(". Length: %u\n\n", parent_pattern->child_step_indices.size); // } // A query step is definite if the containing pattern will definitely match @@ -1055,6 +1098,16 @@ static void ts_query__analyze_patterns(TSQuery *self) { } } } + + if (finished_step_indices.size == 0 || *array_back(&finished_step_indices) < parent_pattern->child_step_indices.size) { + unsigned exists; + array_search_sorted_by( + &self->patterns, 0, + .start_step, + parent_pattern->parent_step_index, impossible_index, &exists); + result = false; + goto cleanup; + } } // In order for a parent step to be definite, all of its child steps must @@ -1090,6 +1143,7 @@ static void ts_query__analyze_patterns(TSQuery *self) { // } // Cleanup +cleanup: for (unsigned i = 0; i < parent_patterns.size; i++) { array_delete(&parent_patterns.contents[i].child_step_indices); } @@ -1105,6 +1159,8 @@ static void ts_query__analyze_patterns(TSQuery *self) { array_delete(&next_walk_states); array_delete(&finished_step_indices); state_predecessor_map_delete(&predecessor_map); + + return result; } static void ts_query__finalize_steps(TSQuery *self) { @@ -1731,7 +1787,13 @@ TSQuery *ts_query_new( } if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { - ts_query__analyze_patterns(self); + unsigned impossible_pattern_index = 0; + if (!ts_query__analyze_patterns(self, &impossible_pattern_index)) { + *error_type = TSQueryErrorPattern; + *error_offset = self->patterns.contents[impossible_pattern_index].start_byte; + ts_query_delete(self); + return NULL; + } } ts_query__finalize_steps(self); From e3cf5df039c599d3515b8d112fa1524df9734b5a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Jun 2020 13:09:38 -0700 Subject: [PATCH 086/282] Use actual step indices when walking subgraphs --- lib/src/array.h | 14 ++ lib/src/query.c | 384 ++++++++++++++++++++++-------------------------- 2 files changed, 193 insertions(+), 205 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index c7e0ae4ab8..e95867cf3a 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -90,6 +90,20 @@ extern "C" { } \ } while (0); +#define array_insert_sorted_by(self, start, field, value) \ + do { \ + unsigned index, exists; \ + array_search_sorted_by(self, start, field, (value) field, &index, &exists); \ + if (!exists) array_insert(self, index, value); \ + } while (0); + +#define array_insert_sorted_with(self, start, compare, value) \ + do { \ + unsigned index, exists; \ + array_search_sorted_with(self, start, compare, &(value), &index, &exists); \ + if (!exists) array_insert(self, index, value); \ + } while (0); + // Private typedef Array(void) VoidArray; diff --git a/lib/src/query.c b/lib/src/query.c index 0b7530da96..563ffe8d79 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -569,25 +569,37 @@ static inline const TSStateId *state_predecessor_map_get( * WalkState ************/ -static inline int walk_state__compare(WalkState *self, WalkState *other) { - if (self->depth < other->depth) return -1; - if (self->depth > other->depth) return 1; - if (self->step_index < other->step_index) return -1; - if (self->step_index > other->step_index) return 1; +static inline int walk_state__compare_position(const WalkState *self, const WalkState *other) { for (unsigned i = 0; i < self->depth; i++) { - if (self->stack[i].state < other->stack[i].state) return -1; - if (self->stack[i].state > other->stack[i].state) return 1; - if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1; - if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1; + if (i >= other->depth) return -1; if (self->stack[i].child_index < other->stack[i].child_index) return -1; if (self->stack[i].child_index > other->stack[i].child_index) return 1; } + if (self->depth < other->depth) return 1; + return 0; +} + +static inline int walk_state__compare(const WalkState *self, const WalkState *other) { + int result = walk_state__compare_position(self, other); + if (result != 0) return result; + for (unsigned i = 0; i < self->depth; i++) { + if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1; + if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1; + if (self->stack[i].state < other->stack[i].state) return -1; + if (self->stack[i].state > other->stack[i].state) return 1; + if (self->stack[i].field_id < other->stack[i].field_id) return -1; + if (self->stack[i].field_id > other->stack[i].field_id) return 1; + } + if (self->step_index < other->step_index) return -1; + if (self->step_index > other->step_index) return 1; return 0; } -static inline int subgraph_node__compare(SubgraphNode *self, SubgraphNode *other) { +static inline int subgraph_node__compare(const SubgraphNode *self, const SubgraphNode *other) { if (self->state < other->state) return -1; if (self->state > other->state) return 1; + if (self->done && !other->done) return -1; + if (!self->done && other->done) return 1; if (self->child_index < other->child_index) return -1; if (self->child_index > other->child_index) return 1; if (self->production_id < other->production_id) return -1; @@ -672,97 +684,52 @@ static inline void ts_query__pattern_map_insert( } static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index) { - typedef struct { - TSSymbol parent_symbol; - uint32_t parent_step_index; - Array(uint32_t) child_step_indices; - } ParentPattern; - - typedef Array(WalkState) WalkStateList; - - bool result = true; - // Identify all of the patterns in the query that have child patterns. This // includes both top-level patterns and patterns that are nested within some // larger pattern. For each of these, record the parent symbol, the step index // and all of the immediate child step indices in reverse order. - Array(ParentPattern) parent_patterns = array_new(); - Array(uint32_t) stack = array_new(); + Array(uint32_t) parent_step_indices = array_new(); for (unsigned i = 0; i < self->steps.size; i++) { QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - array_clear(&stack); - } else { - uint32_t parent_pattern_index = 0; - while (stack.size > 0) { - parent_pattern_index = *array_back(&stack); - ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index]; - QueryStep *parent_step = &self->steps.contents[parent_pattern->parent_step_index]; - if (parent_step->depth >= step->depth) { - stack.size--; - } else { - break; - } - } - - if (stack.size > 0) { - ParentPattern *parent_pattern = &parent_patterns.contents[parent_pattern_index]; - step->is_definite = true; - array_push(&parent_pattern->child_step_indices, i); + if (i + 1 < self->steps.size) { + QueryStep *next_step = &self->steps.contents[i + 1]; + if ( + step->symbol != WILDCARD_SYMBOL && + step->symbol != NAMED_WILDCARD_SYMBOL && + next_step->depth > step->depth && + next_step->depth != PATTERN_DONE_MARKER + ) { + array_push(&parent_step_indices, i); } - - array_push(&stack, parent_patterns.size); - array_push(&parent_patterns, ((ParentPattern) { - .parent_symbol = step->symbol, - .parent_step_index = i, - })); } - } - for (unsigned i = 0; i < parent_patterns.size; i++) { - ParentPattern *parent_pattern = &parent_patterns.contents[i]; - if (parent_pattern->child_step_indices.size == 0) { - array_erase(&parent_patterns, i); - i--; + if (step->depth > 0) { + step->is_definite = true; } } // Debug // { - // printf("\nParent pattern entries\n"); - // for (unsigned i = 0; i < parent_patterns.size; i++) { - // ParentPattern *parent_pattern = &parent_patterns.contents[i]; - // printf(" %s ->", ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); - // for (unsigned j = 0; j < parent_pattern->child_step_indices.size; j++) { - // QueryStep *step = &self->steps.contents[parent_pattern->child_step_indices.contents[j]]; - // printf(" %s", ts_language_symbol_name(self->language, step->symbol)); - // } - // printf("\n"); + // printf("\nParent steps\n"); + // for (unsigned i = 0; i < parent_step_indices.size; i++) { + // uint32_t parent_step_index = parent_step_indices.contents[i]; + // TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + // printf(" %s %u\n", ts_language_symbol_name(self->language, parent_symbol), parent_step_index); // } // } // Initialize a set of subgraphs, with one subgraph for each parent symbol, // in the query, and one subgraph for each hidden symbol. - unsigned subgraph_index = 0, exists; Array(SymbolSubgraph) subgraphs = array_new(); - for (unsigned i = 0; i < parent_patterns.size; i++) { - TSSymbol parent_symbol = parent_patterns.contents[i].parent_symbol; - array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) { - array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = parent_symbol, })); - } + for (unsigned i = 0; i < parent_step_indices.size; i++) { + uint32_t parent_step_index = parent_step_indices.contents[i]; + TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + SymbolSubgraph subgraph = { .symbol = parent_symbol }; + array_insert_sorted_by(&subgraphs, 0, .symbol, subgraph); } - subgraph_index = 0; - for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { + for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { if (!ts_language_symbol_metadata(self->language, sym).visible) { - array_search_sorted_by( - &subgraphs, subgraph_index, - .symbol, sym, - &subgraph_index, &exists - ); - if (!exists) { - array_insert(&subgraphs, subgraph_index, ((SymbolSubgraph) { .symbol = sym, })); - subgraph_index++; - } + SymbolSubgraph subgraph = { .symbol = sym }; + array_insert_sorted_by(&subgraphs, 0, .symbol, subgraph); } } @@ -889,13 +856,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // For each non-terminal pattern, determine if the pattern can successfully match, // and all of the possible children within the pattern where matching could fail. + bool result = true; + typedef Array(WalkState) WalkStateList; WalkStateList walk_states = array_new(); WalkStateList next_walk_states = array_new(); - Array(uint16_t) finished_step_indices = array_new(); - for (unsigned i = 0; i < parent_patterns.size; i++) { - ParentPattern *parent_pattern = &parent_patterns.contents[i]; + Array(uint16_t) final_step_indices = array_new(); + for (unsigned i = 0; i < parent_step_indices.size; i++) { + bool can_finish_pattern = false; + uint16_t parent_step_index = parent_step_indices.contents[i]; + uint16_t parent_depth = self->steps.contents[parent_step_index].depth; + TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, 0, .symbol, parent_pattern->parent_symbol, &subgraph_index, &exists); + array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) { // TODO - what to do for ERROR patterns continue; @@ -908,7 +880,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index for (unsigned j = 0; j < subgraph->start_states.size; j++) { TSStateId state = subgraph->start_states.contents[j]; array_push(&walk_states, ((WalkState) { - .step_index = 0, + .step_index = parent_step_index + 1, .stack = { [0] = { .state = state, @@ -924,36 +896,57 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. - array_clear(&finished_step_indices); - while (walk_states.size > 0) { + array_clear(&final_step_indices); + for (;;) { // Debug // { - // printf("Walk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); + // printf("Final step indices:"); + // for (unsigned j = 0; j < final_step_indices.size; j++) { + // printf(" %u", final_step_indices.contents[j]); + // } + // printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol)); // for (unsigned j = 0; j < walk_states.size; j++) { // WalkState *walk_state = &walk_states.contents[j]; - // printf( - // " %u: {depth: %u, step: %u, state: %u, child_index: %u, parent: %s}\n", - // j, - // walk_state->depth, - // walk_state->step_index, - // walk_state->stack[walk_state->depth - 1].state, - // walk_state->stack[walk_state->depth - 1].child_index, - // ts_language_symbol_name(self->language, walk_state->stack[walk_state->depth - 1].parent_symbol) - // ); + // printf(" %3u: {step: %u, stack: [", j, walk_state->step_index); + // for (unsigned k = 0; k < walk_state->depth; k++) { + // printf( + // " {parent: %s, child_index: %u, field: %s, state: %3u, done:%d}", + // self->language->symbol_names[walk_state->stack[k].parent_symbol], + // walk_state->stack[k].child_index, + // self->language->field_names[walk_state->stack[k].field_id], + // walk_state->stack[k].state, + // walk_state->stack[k].done + // ); + // } + // printf(" ]}\n"); // } + // printf("\n"); // } + if (walk_states.size == 0) break; array_clear(&next_walk_states); - for (unsigned j = 0; j < walk_states.size; j++) { - WalkState *walk_state = &walk_states.contents[j]; - TSStateId state = walk_state__top(walk_state)->state; - unsigned child_index = walk_state__top(walk_state)->child_index; - TSSymbol parent_symbol = walk_state__top(walk_state)->parent_symbol; + + unsigned j = 0; + for (; j < walk_states.size; j++) { + WalkState * const walk_state = &walk_states.contents[j]; + if ( + next_walk_states.size > 0 && + walk_state__compare_position(walk_state, array_back(&next_walk_states)) >= 0 + ) { + array_insert_sorted_with(&next_walk_states, 0, walk_state__compare, *walk_state); + continue; + } + + const TSStateId state = walk_state__top(walk_state)->state; + const TSSymbol parent_symbol = walk_state__top(walk_state)->parent_symbol; + const TSFieldId parent_field_id = walk_state__top(walk_state)->field_id; + const unsigned child_index = walk_state__top(walk_state)->child_index; + const QueryStep * const step = &self->steps.contents[walk_state->step_index]; unsigned subgraph_index, exists; array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) continue; - SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + const SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { TSStateId successor_state = ts_language_next_state(self->language, state, sym); @@ -962,111 +955,93 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists); while (exists && node_index < subgraph->nodes.size) { SubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor_state || node->child_index != child_index + 1) continue; - - WalkState next_walk_state = *walk_state; - walk_state__top(&next_walk_state)->child_index++; - walk_state__top(&next_walk_state)->state = successor_state; - - unsigned step_index = parent_pattern->child_step_indices.contents[walk_state->step_index]; - QueryStep *step = &self->steps.contents[step_index]; + if (node->state != successor_state) break; + if (node->child_index != child_index + 1) continue; TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + TSSymbol visible_symbol = alias ? alias : self->language->symbol_metadata[sym].visible ? self->language->public_symbol_map[sym] : 0; - TSFieldId field_id = 0; - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (field_map->child_index == child_index) { - field_id = field_map->field_id; - break; + TSFieldId field_id = parent_field_id; + if (!field_id) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (field_map->child_index == child_index) { + field_id = field_map->field_id; + break; + } } } - if (node->done) { - walk_state__top(&next_walk_state)->done = true; - } + WalkState next_walk_state = *walk_state; + walk_state__top(&next_walk_state)->child_index++; + walk_state__top(&next_walk_state)->state = successor_state; + if (node->done) walk_state__top(&next_walk_state)->done = true; - bool does_match = true; + bool does_match = false; if (visible_symbol) { + does_match = true; if (step->symbol == NAMED_WILDCARD_SYMBOL) { if (!self->language->symbol_metadata[visible_symbol].named) does_match = false; } else if (step->symbol != WILDCARD_SYMBOL) { if (step->symbol != visible_symbol) does_match = false; } - - if (step->field) { - bool does_match_field = step->field == field_id; - if (!does_match_field) { - for (unsigned i = 0; i < walk_state->depth; i++) { - if (walk_state->stack[i].field_id == step->field) { - does_match_field = true; - } - } - } - does_match &= does_match_field; + if (step->field && step->field != field_id) { + does_match = false; } } else if (next_walk_state.depth < MAX_WALK_STATE_DEPTH) { - does_match = false; next_walk_state.depth++; walk_state__top(&next_walk_state)->state = state; walk_state__top(&next_walk_state)->child_index = 0; walk_state__top(&next_walk_state)->parent_symbol = sym; walk_state__top(&next_walk_state)->field_id = field_id; + walk_state__top(&next_walk_state)->done = false; } else { continue; } if (does_match) { - next_walk_state.step_index++; + for (;;) { + next_walk_state.step_index++; + const QueryStep *step = &self->steps.contents[next_walk_state.step_index]; + if ( + step->depth == PATTERN_DONE_MARKER || + step->depth == parent_depth + ) { + can_finish_pattern = true; + break; + } + if (step->depth == parent_depth + 1) { + break; + } + } } - if (next_walk_state.step_index == parent_pattern->child_step_indices.size) { - unsigned index, exists; - array_search_sorted_by(&finished_step_indices, 0, , next_walk_state.step_index, &index, &exists); - if (!exists) array_insert(&finished_step_indices, index, next_walk_state.step_index); - continue; + while (next_walk_state.depth > 0 && walk_state__top(&next_walk_state)->done) { + memset(walk_state__top(&next_walk_state), 0, sizeof(WalkStateEntry)); + next_walk_state.depth--; } - unsigned index, exists; - array_search_sorted_with( - &next_walk_states, 0, - walk_state__compare, &next_walk_state, - &index, &exists - ); - if (!exists) array_insert(&next_walk_states, index, next_walk_state); + if ( + next_walk_state.depth == 0 || + self->steps.contents[next_walk_state.step_index].depth != parent_depth + 1 + ) { + array_insert_sorted_by(&final_step_indices, 0, , next_walk_state.step_index); + } else { + array_insert_sorted_with(&next_walk_states, 0, walk_state__compare, next_walk_state); + } } } } + } - bool did_pop = false; - while (walk_state->depth > 0 && walk_state__top(walk_state)->done) { - walk_state->depth--; - did_pop = true; - } - - if (did_pop) { - if (walk_state->depth == 0) { - unsigned index, exists; - array_search_sorted_by(&finished_step_indices, 0, , walk_state->step_index, &index, &exists); - if (!exists) array_insert(&finished_step_indices, index, walk_state->step_index); - } else { - unsigned index, exists; - array_search_sorted_with( - &next_walk_states, - 0, - walk_state__compare, - walk_state, - &index, - &exists - ); - if (!exists) array_insert(&next_walk_states, index, *walk_state); - } - } + for (; j < walk_states.size; j++) { + WalkState *walk_state = &walk_states.contents[j]; + array_push(&next_walk_states, *walk_state); } WalkStateList _walk_states = walk_states; @@ -1074,39 +1049,40 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index next_walk_states = _walk_states; } - // Debug - // { - // printf("Finished step indices for %u %s:", i, ts_language_symbol_name(self->language, parent_pattern->parent_symbol)); - // for (unsigned j = 0; j < finished_step_indices.size; j++) { - // printf(" %u", finished_step_indices.contents[j]); - // } - // printf(". Length: %u\n\n", parent_pattern->child_step_indices.size); - // } - // A query step is definite if the containing pattern will definitely match // once the step is reached. In other words, a step is *not* definite if // it's possible to create a syntax node that matches up to until that step, // but does not match the entire pattern. - for (unsigned j = 0, n = parent_pattern->child_step_indices.size; j < n; j++) { - uint32_t step_index = parent_pattern->child_step_indices.contents[j]; - for (unsigned k = 0; k < finished_step_indices.size; k++) { - uint32_t finished_step_index = finished_step_indices.contents[k]; - if (finished_step_index >= j && finished_step_index < n) { - QueryStep *step = &self->steps.contents[step_index]; - step->is_definite = false; + uint32_t child_step_index = parent_step_index + 1; + QueryStep *child_step = &self->steps.contents[child_step_index]; + while (child_step->depth == parent_depth + 1) { + for (unsigned k = 0; k < final_step_indices.size; k++) { + uint32_t final_step_index = final_step_indices.contents[k]; + if ( + final_step_index >= child_step_index && + self->steps.contents[final_step_index].depth != PATTERN_DONE_MARKER + ) { + child_step->is_definite = false; break; } } + do { + child_step_index++; + child_step++; + } while ( + child_step->depth != PATTERN_DONE_MARKER && + child_step->depth > parent_depth + 1 + ); } - if (finished_step_indices.size == 0 || *array_back(&finished_step_indices) < parent_pattern->child_step_indices.size) { + if (result && !can_finish_pattern) { unsigned exists; array_search_sorted_by( &self->patterns, 0, - .start_step, - parent_pattern->parent_step_index, impossible_index, &exists); + .start_step, parent_step_index, + impossible_index, &exists + ); result = false; - goto cleanup; } } @@ -1131,33 +1107,31 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // for (unsigned i = 0; i < self->steps.size; i++) { // QueryStep *step = &self->steps.contents[i]; // if (step->depth == PATTERN_DONE_MARKER) { - // printf("\n"); - // continue; + // printf(" %u: DONE\n", i); + // } else { + // printf( + // " %u: {symbol: %s, is_definite: %d}\n", + // i, + // (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) + // ? "ANY" + // : ts_language_symbol_name(self->language, step->symbol), + // step->is_definite + // ); // } - // printf( - // " {symbol: %s, is_definite: %d}\n", - // (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) ? "ANY" : ts_language_symbol_name(self->language, step->symbol), - // step->is_definite - // ); // } // } // Cleanup -cleanup: - for (unsigned i = 0; i < parent_patterns.size; i++) { - array_delete(&parent_patterns.contents[i].child_step_indices); - } for (unsigned i = 0; i < subgraphs.size; i++) { array_delete(&subgraphs.contents[i].start_states); array_delete(&subgraphs.contents[i].nodes); } - array_delete(&stack); array_delete(&subgraphs); array_delete(&next_nodes); array_delete(&walk_states); - array_delete(&parent_patterns); array_delete(&next_walk_states); - array_delete(&finished_step_indices); + array_delete(&final_step_indices); + array_delete(&parent_step_indices); state_predecessor_map_delete(&predecessor_map); return result; From 9fb39b89545c5fa53650dbac522fa3709065f7e4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Jun 2020 13:49:07 -0700 Subject: [PATCH 087/282] Start work on handling alternatives when analyzing queries --- lib/src/query.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 563ffe8d79..bc277bf9b7 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1004,6 +1004,11 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index continue; } + while (next_walk_state.depth > 0 && walk_state__top(&next_walk_state)->done) { + memset(walk_state__top(&next_walk_state), 0, sizeof(WalkStateEntry)); + next_walk_state.depth--; + } + if (does_match) { for (;;) { next_walk_state.step_index++; @@ -1021,18 +1026,23 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } } - while (next_walk_state.depth > 0 && walk_state__top(&next_walk_state)->done) { - memset(walk_state__top(&next_walk_state), 0, sizeof(WalkStateEntry)); - next_walk_state.depth--; - } - if ( next_walk_state.depth == 0 || self->steps.contents[next_walk_state.step_index].depth != parent_depth + 1 ) { array_insert_sorted_by(&final_step_indices, 0, , next_walk_state.step_index); } else { - array_insert_sorted_with(&next_walk_states, 0, walk_state__compare, next_walk_state); + for (;;) { + const QueryStep *step = &self->steps.contents[next_walk_state.step_index]; + if (!step->is_dead_end) { + array_insert_sorted_with(&next_walk_states, 0, walk_state__compare, next_walk_state); + } + if (step->alternative_index != NONE && step->alternative_index > next_walk_state.step_index) { + next_walk_state.step_index = step->alternative_index; + } else { + break; + } + } } } } From 891de051e2a33afd1f0b677e72965618348980f3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Jun 2020 15:05:44 -0700 Subject: [PATCH 088/282] Fix population of subgraph nodes when analyzing queries --- cli/src/tests/query_test.rs | 8 ++++++++ lib/src/query.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index cc42a70d2c..4c2f65abb8 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2083,6 +2083,14 @@ fn test_query_is_definite() { } let rows = &[ + Row { + language: get_language("python"), + pattern: r#"(expression_statement (string))"#, + results_by_step_index: &[ + (0, false), + (1, false), // string + ], + }, Row { language: get_language("javascript"), pattern: r#"(object "{" "}")"#, diff --git a/lib/src/query.c b/lib/src/query.c index bc277bf9b7..fa0edba14b 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -748,10 +748,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index for (unsigned i = 0; i < count; i++) { const TSParseAction *action = &actions[i]; if (action->type == TSParseActionTypeReduce) { - unsigned exists; array_search_sorted_by( &subgraphs, - subgraph_index, + 0, .symbol, action->params.reduce.symbol, &subgraph_index, @@ -759,13 +758,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index ); if (exists) { SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + SubgraphNode node = { + .state = state, + .production_id = action->params.reduce.production_id, + .child_index = action->params.reduce.child_count, + .done = true, + }; if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, ((SubgraphNode) { - .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, - .done = true, - })); + array_push(&subgraph->nodes, node); } } } else if ( From 19baa5fd5e34257c1b34bcd899812b1b30f2d455 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Jun 2020 17:56:43 -0700 Subject: [PATCH 089/282] Clean up and document query analysis code --- cli/src/tests/query_test.rs | 8 + lib/src/query.c | 431 +++++++++++++++++++----------------- 2 files changed, 232 insertions(+), 207 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 4c2f65abb8..c73931ce89 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2091,6 +2091,14 @@ fn test_query_is_definite() { (1, false), // string ], }, + Row { + language: get_language("javascript"), + pattern: r#"(expression_statement (string))"#, + results_by_step_index: &[ + (0, false), + (1, false), // string + ], + }, Row { language: get_language("javascript"), pattern: r#"(object "{" "}")"#, diff --git a/lib/src/query.c b/lib/src/query.c index fa0edba14b..cf84115f9e 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -15,7 +15,7 @@ #define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 #define MAX_STATE_PREDECESSOR_COUNT 100 -#define MAX_WALK_STATE_DEPTH 4 +#define MAX_ANALYSIS_STATE_DEPTH 4 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -148,36 +148,36 @@ typedef struct { } CaptureListPool; /* - * WalkState - The state needed for walking the parse table when analyzing + * AnalysisState - The state needed for walking the parse table when analyzing * a query pattern, to determine the steps where the pattern could fail * to match. */ typedef struct { - TSStateId state; + TSStateId parse_state; TSSymbol parent_symbol; uint16_t child_index; TSFieldId field_id: 15; bool done: 1; -} WalkStateEntry; +} AnalysisStateEntry; typedef struct { - WalkStateEntry stack[MAX_WALK_STATE_DEPTH]; + AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; uint16_t depth; uint16_t step_index; -} WalkState; +} AnalysisState; typedef struct { TSStateId state; uint8_t production_id; uint8_t child_index: 7; bool done: 1; -} SubgraphNode; +} AnalysisSubgraphNode; typedef struct { TSSymbol symbol; Array(TSStateId) start_states; - Array(SubgraphNode) nodes; -} SymbolSubgraph; + Array(AnalysisSubgraphNode) nodes; +} AnalysisSubgraph; /* * StatePredecessorMap - A map that stores the predecessors of each parse state. @@ -565,11 +565,14 @@ static inline const TSStateId *state_predecessor_map_get( return &self->contents[index + 1]; } -/************ - * WalkState - ************/ +/**************** + * AnalysisState + ****************/ -static inline int walk_state__compare_position(const WalkState *self, const WalkState *other) { +static inline int analysis_state__compare_position( + const AnalysisState *self, + const AnalysisState *other +) { for (unsigned i = 0; i < self->depth; i++) { if (i >= other->depth) return -1; if (self->stack[i].child_index < other->stack[i].child_index) return -1; @@ -579,14 +582,17 @@ static inline int walk_state__compare_position(const WalkState *self, const Walk return 0; } -static inline int walk_state__compare(const WalkState *self, const WalkState *other) { - int result = walk_state__compare_position(self, other); +static inline int analysis_state__compare( + const AnalysisState *self, + const AnalysisState *other +) { + int result = analysis_state__compare_position(self, other); if (result != 0) return result; for (unsigned i = 0; i < self->depth; i++) { if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1; if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1; - if (self->stack[i].state < other->stack[i].state) return -1; - if (self->stack[i].state > other->stack[i].state) return 1; + if (self->stack[i].parse_state < other->stack[i].parse_state) return -1; + if (self->stack[i].parse_state > other->stack[i].parse_state) return 1; if (self->stack[i].field_id < other->stack[i].field_id) return -1; if (self->stack[i].field_id > other->stack[i].field_id) return 1; } @@ -595,7 +601,15 @@ static inline int walk_state__compare(const WalkState *self, const WalkState *ot return 0; } -static inline int subgraph_node__compare(const SubgraphNode *self, const SubgraphNode *other) { +static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { + return &self->stack[self->depth - 1]; +} + +/*********************** + * AnalysisSubgraphNode + ***********************/ + +static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { if (self->state < other->state) return -1; if (self->state > other->state) return 1; if (self->done && !other->done) return -1; @@ -607,10 +621,6 @@ static inline int subgraph_node__compare(const SubgraphNode *self, const Subgrap return 0; } -static inline WalkStateEntry *walk_state__top(WalkState *self) { - return &self->stack[self->depth - 1]; -} - /********* * Query *********/ @@ -683,11 +693,12 @@ static inline void ts_query__pattern_map_insert( })); } +// #define DEBUG_ANALYZE_QUERY + static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index) { - // Identify all of the patterns in the query that have child patterns. This - // includes both top-level patterns and patterns that are nested within some - // larger pattern. For each of these, record the parent symbol, the step index - // and all of the immediate child step indices in reverse order. + // Identify all of the patterns in the query that have child patterns, both at the + // top level and nested within other larger patterns. Record the step index where + // each pattern starts. Array(uint32_t) parent_step_indices = array_new(); for (unsigned i = 0; i < self->steps.size; i++) { QueryStep *step = &self->steps.contents[i]; @@ -707,33 +718,29 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } } - // Debug - // { - // printf("\nParent steps\n"); - // for (unsigned i = 0; i < parent_step_indices.size; i++) { - // uint32_t parent_step_index = parent_step_indices.contents[i]; - // TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; - // printf(" %s %u\n", ts_language_symbol_name(self->language, parent_symbol), parent_step_index); - // } - // } - - // Initialize a set of subgraphs, with one subgraph for each parent symbol, - // in the query, and one subgraph for each hidden symbol. - Array(SymbolSubgraph) subgraphs = array_new(); + // For every parent symbol in the query, initialize an 'analysis subgraph'. + // This subgraph lists all of the states in the parse table that are directly + // involved in building subtrees for this symbol. + // + // In addition to the parent symbols in the query, construct subgraphs for all + // of the hidden symbols in the grammar, because these might occur within + // one of the parent nodes, such that their children appear to belong to the + // parent. + Array(AnalysisSubgraph) subgraphs = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint32_t parent_step_index = parent_step_indices.contents[i]; TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; - SymbolSubgraph subgraph = { .symbol = parent_symbol }; + AnalysisSubgraph subgraph = { .symbol = parent_symbol }; array_insert_sorted_by(&subgraphs, 0, .symbol, subgraph); } for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { if (!ts_language_symbol_metadata(self->language, sym).visible) { - SymbolSubgraph subgraph = { .symbol = sym }; + AnalysisSubgraph subgraph = { .symbol = sym }; array_insert_sorted_by(&subgraphs, 0, .symbol, subgraph); } } - // Scan the parse table to find the data needed for these subgraphs. + // Scan the parse table to find the data needed to populate these subgraphs. // Collect three things during this scan: // 1) All of the parse states where one of these symbols can start. // 2) All of the parse states where one of these symbols can end, along @@ -757,21 +764,17 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index &exists ); if (exists) { - SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - SubgraphNode node = { - .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, - .done = true, - }; + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, node); + array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { + .state = state, + .production_id = action->params.reduce.production_id, + .child_index = action->params.reduce.child_count, + .done = true, + })); } } - } else if ( - action->type == TSParseActionTypeShift && - !action->params.shift.extra - ) { + } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { TSStateId next_state = action->params.shift.state; state_predecessor_map_add(&predecessor_map, next_state, state); } @@ -790,18 +793,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index &exists ); if (exists) { - SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; array_push(&subgraph->start_states, state); } } } } - // For each subgraph, compute the remainder of the nodes by walking backward + // For each subgraph, compute the preceding states by walking backward // from the end states using the predecessor map. - Array(SubgraphNode) next_nodes = array_new(); + Array(AnalysisSubgraphNode) next_nodes = array_new(); for (unsigned i = 0; i < subgraphs.size; i++) { - SymbolSubgraph *subgraph = &subgraphs.contents[i]; + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; if (subgraph->nodes.size == 0) { array_delete(&subgraph->start_states); array_erase(&subgraphs, i); @@ -810,7 +813,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } array_assign(&next_nodes, &subgraph->nodes); while (next_nodes.size > 0) { - SubgraphNode node = array_pop(&next_nodes); + AnalysisSubgraphNode node = array_pop(&next_nodes); if (node.child_index > 1) { unsigned predecessor_count; const TSStateId *predecessors = state_predecessor_map_get( @@ -819,7 +822,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index &predecessor_count ); for (unsigned j = 0; j < predecessor_count; j++) { - SubgraphNode predecessor_node = { + AnalysisSubgraphNode predecessor_node = { .state = predecessors[j], .child_index = node.child_index - 1, .production_id = node.production_id, @@ -828,7 +831,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index unsigned index, exists; array_search_sorted_with( &subgraph->nodes, 0, - subgraph_node__compare, &predecessor_node, + analysis_subgraph_node__compare, &predecessor_node, &index, &exists ); if (!exists) { @@ -840,52 +843,48 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } } - // Debug - // { - // printf("\nSubgraphs:\n"); - // for (unsigned i = 0; i < subgraphs.size; i++) { - // SymbolSubgraph *subgraph = &subgraphs.contents[i]; - // printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); - // for (unsigned j = 0; j < subgraph->nodes.size; j++) { - // SubgraphNode *node = &subgraph->nodes.contents[j]; - // printf(" {state: %u, child_index: %u}\n", node->state, node->child_index); - // } - // printf("\n"); - // } - // } + #ifdef DEBUG_ANALYZE_QUERY + printf("\nSubgraphs:\n"); + for (unsigned i = 0; i < subgraphs.size; i++) { + AnalysisSubgraph *subgraph = &subgraphs.contents[i]; + printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); + for (unsigned j = 0; j < subgraph->nodes.size; j++) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; + printf(" {state: %u, child_index: %u}\n", node->state, node->child_index); + } + printf("\n"); + } + #endif // For each non-terminal pattern, determine if the pattern can successfully match, - // and all of the possible children within the pattern where matching could fail. + // and identify all of the possible children within the pattern where matching could fail. bool result = true; - typedef Array(WalkState) WalkStateList; - WalkStateList walk_states = array_new(); - WalkStateList next_walk_states = array_new(); + typedef Array(AnalysisState) AnalysisStateList; + AnalysisStateList states = array_new(); + AnalysisStateList next_states = array_new(); Array(uint16_t) final_step_indices = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { - bool can_finish_pattern = false; + // Find the subgraph that corresponds to this pattern's root symbol. uint16_t parent_step_index = parent_step_indices.contents[i]; uint16_t parent_depth = self->steps.contents[parent_step_index].depth; TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; unsigned subgraph_index, exists; array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) { - // TODO - what to do for ERROR patterns - continue; - } - SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (!exists) continue; + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - // Initialize a walk at every possible parse state where this non-terminal - // symbol can start. - array_clear(&walk_states); + // Initialize an analysis state at every parse state in the table where + // this parent symbol can occur. + array_clear(&states); for (unsigned j = 0; j < subgraph->start_states.size; j++) { - TSStateId state = subgraph->start_states.contents[j]; - array_push(&walk_states, ((WalkState) { + TSStateId parse_state = subgraph->start_states.contents[j]; + array_push(&states, ((AnalysisState) { .step_index = parent_step_index + 1, .stack = { [0] = { - .state = state, + .parse_state = parse_state, + .parent_symbol = parent_symbol, .child_index = 0, - .parent_symbol = subgraph->symbol, .field_id = 0, .done = false, }, @@ -896,75 +895,87 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. + bool can_finish_pattern = false; array_clear(&final_step_indices); for (;;) { - // Debug - // { - // printf("Final step indices:"); - // for (unsigned j = 0; j < final_step_indices.size; j++) { - // printf(" %u", final_step_indices.contents[j]); - // } - // printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol)); - // for (unsigned j = 0; j < walk_states.size; j++) { - // WalkState *walk_state = &walk_states.contents[j]; - // printf(" %3u: {step: %u, stack: [", j, walk_state->step_index); - // for (unsigned k = 0; k < walk_state->depth; k++) { - // printf( - // " {parent: %s, child_index: %u, field: %s, state: %3u, done:%d}", - // self->language->symbol_names[walk_state->stack[k].parent_symbol], - // walk_state->stack[k].child_index, - // self->language->field_names[walk_state->stack[k].field_id], - // walk_state->stack[k].state, - // walk_state->stack[k].done - // ); - // } - // printf(" ]}\n"); - // } - // printf("\n"); - // } - - if (walk_states.size == 0) break; - array_clear(&next_walk_states); - - unsigned j = 0; - for (; j < walk_states.size; j++) { - WalkState * const walk_state = &walk_states.contents[j]; - if ( - next_walk_states.size > 0 && - walk_state__compare_position(walk_state, array_back(&next_walk_states)) >= 0 - ) { - array_insert_sorted_with(&next_walk_states, 0, walk_state__compare, *walk_state); - continue; + #ifdef DEBUG_ANALYZE_QUERY + printf("Final step indices:"); + for (unsigned j = 0; j < final_step_indices.size; j++) { + printf(" %u", final_step_indices.contents[j]); + } + printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol)); + for (unsigned j = 0; j < states.size; j++) { + AnalysisState *state = &states.contents[j]; + printf(" %3u: {step: %u, stack: [", j, state->step_index); + for (unsigned k = 0; k < state->depth; k++) { + printf( + " {parent: %s, child_index: %u, field: %s, state: %3u, done:%d}", + self->language->symbol_names[state->stack[k].parent_symbol], + state->stack[k].child_index, + self->language->field_names[state->stack[k].field_id], + state->stack[k].parse_state, + state->stack[k].done + ); + } + printf(" ]}\n"); + } + #endif + + if (states.size == 0) break; + array_clear(&next_states); + for (unsigned j = 0; j < states.size; j++) { + AnalysisState * const state = &states.contents[j]; + + // For efficiency, it's important to avoid processing the same analysis state more + // than once. To achieve this, keep the states in order of ascending position within + // their hypothetical syntax trees. In each iteration of this loop, start by advancing + // the states that have made the least progress. Avoid advancing states that have already + // made more progress. + if (next_states.size > 0) { + int comparison = analysis_state__compare_position(state, array_back(&next_states)); + if (comparison == 0) { + array_insert_sorted_with(&next_states, 0, analysis_state__compare, *state); + continue; + } else if (comparison > 0) { + while (j < states.size) { + array_push(&next_states, states.contents[j]); + j++; + } + break; + } } - const TSStateId state = walk_state__top(walk_state)->state; - const TSSymbol parent_symbol = walk_state__top(walk_state)->parent_symbol; - const TSFieldId parent_field_id = walk_state__top(walk_state)->field_id; - const unsigned child_index = walk_state__top(walk_state)->child_index; - const QueryStep * const step = &self->steps.contents[walk_state->step_index]; + const TSStateId parse_state = analysis_state__top(state)->parse_state; + const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; + const TSFieldId parent_field_id = analysis_state__top(state)->field_id; + const unsigned child_index = analysis_state__top(state)->child_index; + const QueryStep * const step = &self->steps.contents[state->step_index]; unsigned subgraph_index, exists; array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) continue; - const SymbolSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + const AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + // Follow every possible path in the parse table, but only visit states that + // are part of the subgraph for the current symbol. for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { - TSStateId successor_state = ts_language_next_state(self->language, state, sym); - if (successor_state && successor_state != state) { + TSStateId successor_state = ts_language_next_state(self->language, parse_state, sym); + if (successor_state && successor_state != parse_state) { unsigned node_index; array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists); while (exists && node_index < subgraph->nodes.size) { - SubgraphNode *node = &subgraph->nodes.contents[node_index++]; + AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; if (node->state != successor_state) break; if (node->child_index != child_index + 1) continue; - TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + // Use the subgraph to determine what alias and field will eventually be applied + // to this child node. + TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); TSSymbol visible_symbol = alias ? alias : self->language->symbol_metadata[sym].visible ? self->language->public_symbol_map[sym] : 0; - TSFieldId field_id = parent_field_id; if (!field_id) { const TSFieldMapEntry *field_map, *field_map_end; @@ -977,11 +988,13 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } } - WalkState next_walk_state = *walk_state; - walk_state__top(&next_walk_state)->child_index++; - walk_state__top(&next_walk_state)->state = successor_state; - if (node->done) walk_state__top(&next_walk_state)->done = true; + AnalysisState next_state = *state; + analysis_state__top(&next_state)->child_index++; + analysis_state__top(&next_state)->parse_state = successor_state; + if (node->done) analysis_state__top(&next_state)->done = true; + // Determine if this hypothetical child node would match the current step + // of the query pattern. bool does_match = false; if (visible_symbol) { does_match = true; @@ -993,70 +1006,75 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index if (step->field && step->field != field_id) { does_match = false; } - } else if (next_walk_state.depth < MAX_WALK_STATE_DEPTH) { - next_walk_state.depth++; - walk_state__top(&next_walk_state)->state = state; - walk_state__top(&next_walk_state)->child_index = 0; - walk_state__top(&next_walk_state)->parent_symbol = sym; - walk_state__top(&next_walk_state)->field_id = field_id; - walk_state__top(&next_walk_state)->done = false; + } + + // If this is a hidden child, then push a new entry to the stack, in order to + // walk through the children of this child. + else if (next_state.depth < MAX_ANALYSIS_STATE_DEPTH) { + next_state.depth++; + analysis_state__top(&next_state)->parse_state = parse_state; + analysis_state__top(&next_state)->child_index = 0; + analysis_state__top(&next_state)->parent_symbol = sym; + analysis_state__top(&next_state)->field_id = field_id; + analysis_state__top(&next_state)->done = false; } else { continue; } - while (next_walk_state.depth > 0 && walk_state__top(&next_walk_state)->done) { - memset(walk_state__top(&next_walk_state), 0, sizeof(WalkStateEntry)); - next_walk_state.depth--; + // Pop from the stack when this state reached the end of its current syntax node. + while (next_state.depth > 0 && analysis_state__top(&next_state)->done) { + next_state.depth--; } + // If this hypothetical child did match the current step of the query pattern, + // then advance to the next step at the current depth. This involves skipping + // over any descendant steps of the current child. + const QueryStep *next_step = step; if (does_match) { for (;;) { - next_walk_state.step_index++; - const QueryStep *step = &self->steps.contents[next_walk_state.step_index]; + next_state.step_index++; + next_step = &self->steps.contents[next_state.step_index]; if ( - step->depth == PATTERN_DONE_MARKER || - step->depth == parent_depth - ) { - can_finish_pattern = true; - break; - } - if (step->depth == parent_depth + 1) { - break; - } + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= parent_depth + 1 + ) break; } } - if ( - next_walk_state.depth == 0 || - self->steps.contents[next_walk_state.step_index].depth != parent_depth + 1 - ) { - array_insert_sorted_by(&final_step_indices, 0, , next_walk_state.step_index); - } else { - for (;;) { - const QueryStep *step = &self->steps.contents[next_walk_state.step_index]; - if (!step->is_dead_end) { - array_insert_sorted_with(&next_walk_states, 0, walk_state__compare, next_walk_state); - } - if (step->alternative_index != NONE && step->alternative_index > next_walk_state.step_index) { - next_walk_state.step_index = step->alternative_index; + for (;;) { + // If this state can make further progress, then add it to the states for the next iteration. + // Otherwise, record the fact that matching can fail at this step of the pattern. + if (!next_step->is_dead_end) { + bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; + if (did_finish_pattern) can_finish_pattern = true; + if (next_state.depth > 0 && !did_finish_pattern) { + array_insert_sorted_with(&next_states, 0, analysis_state__compare, next_state); } else { - break; + array_insert_sorted_by(&final_step_indices, 0, , next_state.step_index); } } + + // If the state has advanced to a step with an alternative step, then add another state at + // that alternative step to the next iteration. + if ( + does_match && + next_step->alternative_index != NONE && + next_step->alternative_index > next_state.step_index + ) { + next_state.step_index = next_step->alternative_index; + next_step = &self->steps.contents[next_state.step_index]; + } else { + break; + } } } } } } - for (; j < walk_states.size; j++) { - WalkState *walk_state = &walk_states.contents[j]; - array_push(&next_walk_states, *walk_state); - } - - WalkStateList _walk_states = walk_states; - walk_states = next_walk_states; - next_walk_states = _walk_states; + AnalysisStateList _states = states; + states = next_states; + next_states = _states; } // A query step is definite if the containing pattern will definitely match @@ -1111,25 +1129,24 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } } - // Debug - // { - // printf("\nSteps:\n"); - // for (unsigned i = 0; i < self->steps.size; i++) { - // QueryStep *step = &self->steps.contents[i]; - // if (step->depth == PATTERN_DONE_MARKER) { - // printf(" %u: DONE\n", i); - // } else { - // printf( - // " %u: {symbol: %s, is_definite: %d}\n", - // i, - // (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) - // ? "ANY" - // : ts_language_symbol_name(self->language, step->symbol), - // step->is_definite - // ); - // } - // } - // } + #ifdef DEBUG_ANALYZE_QUERY + printf("Steps:\n"); + for (unsigned i = 0; i < self->steps.size; i++) { + QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) { + printf(" %u: DONE\n", i); + } else { + printf( + " %u: {symbol: %s, is_definite: %d}\n", + i, + (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) + ? "ANY" + : ts_language_symbol_name(self->language, step->symbol), + step->is_definite + ); + } + } + #endif // Cleanup for (unsigned i = 0; i < subgraphs.size; i++) { @@ -1138,8 +1155,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } array_delete(&subgraphs); array_delete(&next_nodes); - array_delete(&walk_states); - array_delete(&next_walk_states); + array_delete(&states); + array_delete(&next_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); state_predecessor_map_delete(&predecessor_map); From 997ef45992c2bdf33927fdff65c56fb11dc6ab6c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 26 Jun 2020 15:05:10 -0700 Subject: [PATCH 090/282] Handle parent nodes with simple aliases in query analysis --- lib/src/query.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index cf84115f9e..bf781204cf 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -755,11 +755,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index for (unsigned i = 0; i < count; i++) { const TSParseAction *action = &actions[i]; if (action->type == TSParseActionTypeReduce) { + TSSymbol symbol = self->language->public_symbol_map[action->params.reduce.symbol]; array_search_sorted_by( &subgraphs, 0, .symbol, - action->params.reduce.symbol, + symbol, &subgraph_index, &exists ); @@ -784,11 +785,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index TSStateId next_state = ts_language_next_state(self->language, state, sym); if (next_state != 0) { state_predecessor_map_add(&predecessor_map, next_state, state); + TSSymbol symbol = self->language->public_symbol_map[sym]; array_search_sorted_by( &subgraphs, subgraph_index, .symbol, - sym, + symbol, &subgraph_index, &exists ); @@ -850,7 +852,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); for (unsigned j = 0; j < subgraph->nodes.size; j++) { AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; - printf(" {state: %u, child_index: %u}\n", node->state, node->child_index); + printf(" {state: %u, child_index: %u, production_id: %u}\n", node->state, node->child_index, node->production_id); } printf("\n"); } From a317199215f1bf1848f12c57ee3713c721a4a392 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 26 Jun 2020 15:05:27 -0700 Subject: [PATCH 091/282] Add query construction to benchmark --- cli/benches/benchmark.rs | 122 ++++++++++++++++++++++++--------------- script/benchmark | 20 ++++++- 2 files changed, 94 insertions(+), 48 deletions(-) diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index 50ee5370e1..53ab3fea23 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -2,8 +2,8 @@ use lazy_static::lazy_static; use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::time::Instant; -use std::{env, fs, usize}; -use tree_sitter::{Language, Parser}; +use std::{env, fs, str, usize}; +use tree_sitter::{Language, Parser, Query}; use tree_sitter_cli::error::Error; use tree_sitter_cli::loader::Loader; @@ -18,26 +18,33 @@ lazy_static! { .map(|s| usize::from_str_radix(&s, 10).unwrap()) .unwrap_or(5); static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); - static ref EXAMPLE_PATHS_BY_LANGUAGE_DIR: BTreeMap> = { - fn process_dir(result: &mut BTreeMap>, dir: &Path) { + static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap, Vec)> = { + fn process_dir(result: &mut BTreeMap, Vec)>, dir: &Path) { if dir.join("grammar.js").exists() { let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap(); + let (example_paths, query_paths) = + result.entry(relative_path.to_owned()).or_default(); + if let Ok(example_files) = fs::read_dir(&dir.join("examples")) { - result.insert( - relative_path.to_owned(), - example_files - .filter_map(|p| { - let p = p.unwrap().path(); - if p.is_file() { - Some(p) - } else { - None - } - }) - .collect(), - ); - } else { - result.insert(relative_path.to_owned(), Vec::new()); + example_paths.extend(example_files.filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p.to_owned()) + } else { + None + } + })); + } + + if let Ok(query_files) = fs::read_dir(&dir.join("queries")) { + query_paths.extend(query_files.filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p.to_owned()) + } else { + None + } + })); } } else { for entry in fs::read_dir(&dir).unwrap() { @@ -56,20 +63,25 @@ lazy_static! { } fn main() { - let mut parser = Parser::new(); - let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_DIR - .iter() - .flat_map(|(_, paths)| paths.iter()) - .map(|p| p.file_name().unwrap().to_str().unwrap().chars().count()) + let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR + .values() + .flat_map(|(e, q)| { + e.iter() + .chain(q.iter()) + .map(|s| s.file_name().unwrap().to_str().unwrap().len()) + }) .max() - .unwrap(); + .unwrap_or(0); + eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT); + + let mut parser = Parser::new(); let mut all_normal_speeds = Vec::new(); let mut all_error_speeds = Vec::new(); - eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT); - - for (language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() { + for (language_path, (example_paths, query_paths)) in + EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() + { let language_name = language_path.file_name().unwrap().to_str().unwrap(); if let Some(filter) = LANGUAGE_FILTER.as_ref() { @@ -79,9 +91,24 @@ fn main() { } eprintln!("\nLanguage: {}", language_name); - parser.set_language(get_language(language_path)).unwrap(); + let language = get_language(language_path); + parser.set_language(language).unwrap(); + + eprintln!(" Constructing Queries"); + for path in query_paths { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !path.to_str().unwrap().contains(filter.as_str()) { + continue; + } + } + + parse(&path, max_path_length, |source| { + Query::new(language, str::from_utf8(source).unwrap()) + .expect("Failed to parse query"); + }); + } - eprintln!(" Normal examples:"); + eprintln!(" Parsing Valid Code:"); let mut normal_speeds = Vec::new(); for example_path in example_paths { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -90,12 +117,16 @@ fn main() { } } - normal_speeds.push(parse(&mut parser, example_path, max_path_length)); + normal_speeds.push(parse(example_path, max_path_length, |code| { + parser.parse(code, None).expect("Failed to parse"); + })); } - eprintln!(" Error examples (mismatched languages):"); + eprintln!(" Parsing Invalid Code (mismatched languages):"); let mut error_speeds = Vec::new(); - for (other_language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() { + for (other_language_path, (example_paths, _)) in + EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() + { if other_language_path != language_path { for example_path in example_paths { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -104,7 +135,9 @@ fn main() { } } - error_speeds.push(parse(&mut parser, example_path, max_path_length)); + error_speeds.push(parse(example_path, max_path_length, |code| { + parser.parse(code, None).expect("Failed to parse"); + })); } } } @@ -123,7 +156,7 @@ fn main() { all_error_speeds.extend(error_speeds); } - eprintln!("\nOverall"); + eprintln!("\n Overall"); if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); @@ -151,28 +184,25 @@ fn aggregate(speeds: &Vec) -> Option<(usize, usize)> { Some((total / speeds.len(), max)) } -fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize { +fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize { eprint!( " {:width$}\t", - example_path.file_name().unwrap().to_str().unwrap(), + path.file_name().unwrap().to_str().unwrap(), width = max_path_length ); - let source_code = fs::read(example_path) - .map_err(Error::wrap(|| format!("Failed to read {:?}", example_path))) + let source_code = fs::read(path) + .map_err(Error::wrap(|| format!("Failed to read {:?}", path))) .unwrap(); let time = Instant::now(); for _ in 0..*REPETITION_COUNT { - parser - .parse(&source_code, None) - .expect("Incompatible language version"); + action(&source_code); } let duration = time.elapsed() / (*REPETITION_COUNT as u32); - let duration_ms = - duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0; - let speed = (source_code.len() as f64 / duration_ms) as usize; + let duration_ms = duration.as_millis(); + let speed = source_code.len() as u128 / (duration_ms + 1); eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed); - speed + speed as usize } fn get_language(path: &Path) -> Language { diff --git a/script/benchmark b/script/benchmark index 61e57920b8..7599e98967 100755 --- a/script/benchmark +++ b/script/benchmark @@ -18,15 +18,22 @@ OPTIONS -r parse each sample the given number of times (default 5) + -g debug + EOF } -while getopts "hl:e:r:" option; do +mode=normal + +while getopts "hgl:e:r:" option; do case ${option} in h) usage exit ;; + g) + mode=debug + ;; e) export TREE_SITTER_BENCHMARK_EXAMPLE_FILTER=${OPTARG} ;; @@ -39,4 +46,13 @@ while getopts "hl:e:r:" option; do esac done -cargo bench benchmark +if [[ "${mode}" == "debug" ]]; then + test_binary=$( + cargo bench benchmark --no-run --message-format=json 2> /dev/null |\ + jq -rs 'map(select(.target.name == "benchmark" and .executable))[0].executable' + ) + env | grep TREE_SITTER + echo $test_binary +else + exec cargo bench benchmark +fi From 645aacb1e7b8a02cf7badaf90e08d77350daa74f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 26 Jun 2020 15:40:34 -0700 Subject: [PATCH 092/282] Optimize query analysis using binary search --- lib/src/array.h | 47 +++++++++++++++++++++++++---------------------- lib/src/query.c | 24 +++++++++++++++--------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index e95867cf3a..7fae7a4055 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -66,43 +66,46 @@ extern "C" { #define array_assign(self, other) \ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) -#define array_search_sorted_by(self, start, field, needle, out_index, out_exists) \ +#define array__search_sorted(self, start, compare, suffix, needle, index, exists) \ do { \ - *(out_exists) = false; \ - for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \ - int _comparison = (int)((self)->contents[*(out_index)] field) - (int)(needle); \ - if (_comparison >= 0) { \ - if (_comparison == 0) *(out_exists) = true; \ - break; \ - } \ + *(index) = start; \ + *(exists) = false; \ + uint32_t size = (self)->size - *(index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(index) = mid_index; \ + size -= half_size; \ } \ - } while (0); + comparison = compare(&((self)->contents[*(index)] suffix), (needle)); \ + if (comparison == 0) *(exists) = true; \ + else if (comparison < 0) *(index) += 1; \ + } while (0) -#define array_search_sorted_with(self, start, compare, needle, out_index, out_exists) \ - do { \ - *(out_exists) = false; \ - for (*(out_index) = start; *(out_index) < (self)->size; (*(out_index))++) { \ - int _comparison = compare(&(self)->contents[*(out_index)], (needle)); \ - if (_comparison >= 0) { \ - if (_comparison == 0) *(out_exists) = true; \ - break; \ - } \ - } \ - } while (0); +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#define array_search_sorted_by(self, start, field, needle, index, exists) \ + array__search_sorted(self, start, _compare_int, field, needle, index, exists) + +#define array_search_sorted_with(self, start, compare, needle, index, exists) \ + array__search_sorted(self, start, compare, , needle, index, exists) #define array_insert_sorted_by(self, start, field, value) \ do { \ unsigned index, exists; \ array_search_sorted_by(self, start, field, (value) field, &index, &exists); \ if (!exists) array_insert(self, index, value); \ - } while (0); + } while (0) #define array_insert_sorted_with(self, start, compare, value) \ do { \ unsigned index, exists; \ array_search_sorted_with(self, start, compare, &(value), &index, &exists); \ if (!exists) array_insert(self, index, value); \ - } while (0); + } while (0) // Private diff --git a/lib/src/query.c b/lib/src/query.c index bf781204cf..64a1b8a005 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -612,10 +612,10 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { if (self->state < other->state) return -1; if (self->state > other->state) return 1; - if (self->done && !other->done) return -1; - if (!self->done && other->done) return 1; if (self->child_index < other->child_index) return -1; if (self->child_index > other->child_index) return 1; + if (self->done < other->done) return -1; + if (self->done > other->done) return 1; if (self->production_id < other->production_id) return -1; if (self->production_id > other->production_id) return 1; return 0; @@ -961,14 +961,20 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // Follow every possible path in the parse table, but only visit states that // are part of the subgraph for the current symbol. for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { - TSStateId successor_state = ts_language_next_state(self->language, parse_state, sym); - if (successor_state && successor_state != parse_state) { + AnalysisSubgraphNode successor = { + .state = ts_language_next_state(self->language, parse_state, sym), + .child_index = child_index + 1, + }; + if (successor.state && successor.state != parse_state) { unsigned node_index; - array_search_sorted_by(&subgraph->nodes, 0, .state, successor_state, &node_index, &exists); - while (exists && node_index < subgraph->nodes.size) { + array_search_sorted_with( + &subgraph->nodes, 0, + analysis_subgraph_node__compare, &successor, + &node_index, &exists + ); + while (node_index < subgraph->nodes.size) { AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor_state) break; - if (node->child_index != child_index + 1) continue; + if (node->state != successor.state || node->child_index != successor.child_index) break; // Use the subgraph to determine what alias and field will eventually be applied // to this child node. @@ -992,7 +998,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index AnalysisState next_state = *state; analysis_state__top(&next_state)->child_index++; - analysis_state__top(&next_state)->parse_state = successor_state; + analysis_state__top(&next_state)->parse_state = successor.state; if (node->done) analysis_state__top(&next_state)->done = true; // Determine if this hypothetical child node would match the current step From cc37da7457da79795e47a41878342758b443004b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 26 Jun 2020 16:31:08 -0700 Subject: [PATCH 093/282] Query analysis: fix propagation of uncertainty from later siblings --- cli/src/tests/query_test.rs | 89 ++++++++++++++++++++++------------- lib/binding_rust/bindings.rs | 1 + lib/binding_rust/lib.rs | 4 +- lib/include/tree_sitter/api.h | 1 + lib/src/query.c | 71 ++++++++++++++++------------ 5 files changed, 99 insertions(+), 67 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c73931ce89..5f6979a264 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2079,90 +2079,111 @@ fn test_query_is_definite() { struct Row { language: Language, pattern: &'static str, - results_by_step_index: &'static [(usize, bool)], + results_by_symbol: &'static [(&'static str, bool)], } let rows = &[ Row { language: get_language("python"), pattern: r#"(expression_statement (string))"#, - results_by_step_index: &[ - (0, false), - (1, false), // string + results_by_symbol: &[ + ("expression_statement", false), + ("string", false), ], }, Row { language: get_language("javascript"), pattern: r#"(expression_statement (string))"#, - results_by_step_index: &[ - (0, false), - (1, false), // string + results_by_symbol: &[ + ("expression_statement", false), + ("string", false), // string ], }, Row { language: get_language("javascript"), pattern: r#"(object "{" "}")"#, - results_by_step_index: &[ - (0, false), - (1, true), // "{" - (2, true), // "}" + results_by_symbol: &[ + ("object", false), + ("{", true), + ("}", true), ], }, Row { language: get_language("javascript"), pattern: r#"(pair (property_identifier) ":")"#, - results_by_step_index: &[ - (0, false), - (1, false), // property_identifier - (2, true), // ":"" + results_by_symbol: &[ + ("pair", false), + ("property_identifier", false), + (":", true), ], }, Row { language: get_language("javascript"), pattern: r#"(object "{" (_) "}")"#, - results_by_step_index: &[ - (0, false), - (1, false), // "{"" - (2, false), // (_) - (3, true), // "}" + results_by_symbol: &[ + ("object", false), + ("{", false), + ("", false), + ("}", true), ], }, Row { - // Named wildcards, fields language: get_language("javascript"), pattern: r#"(binary_expression left: (identifier) right: (_))"#, - results_by_step_index: &[ - (0, false), - (1, false), // identifier - (2, true), // (_) + results_by_symbol: &[ + ("binary_expression", false), + ("identifier", false), + ("", true), ], }, Row { language: get_language("javascript"), pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, - results_by_step_index: &[ - (0, false), - (1, true), // identifier - (2, true), // statement_block + results_by_symbol: &[ + ("function_declaration", false), + ("identifier", true), + ("statement_block", true), + ], + }, + Row { + language: get_language("javascript"), + pattern: r#" + (function_declaration + name: (identifier) + body: (statement_block "{" (expression_statement) "}"))"#, + results_by_symbol: &[ + ("function_declaration", false), + ("identifier", false), + ("statement_block", false), + ("{", false), + ("expression_statement", false), + ("}", true), ], }, Row { language: get_language("javascript"), pattern: r#""#, - results_by_step_index: &[], + results_by_symbol: &[], }, ]; allocations::record(|| { for row in rows.iter() { let query = Query::new(row.language, row.pattern).unwrap(); - for (step_index, is_definite) in row.results_by_step_index { + for (symbol_name, is_definite) in row.results_by_symbol { + let mut symbol = 0; + if !symbol_name.is_empty() { + symbol = row.language.id_for_node_kind(symbol_name, true); + if symbol == 0 { + symbol = row.language.id_for_node_kind(symbol_name, false); + } + } assert_eq!( - query.pattern_is_definite(0, *step_index), + query.pattern_is_definite(0, symbol, 0), *is_definite, - "Pattern: {:?}, step: {}, expected is_definite to be {}", + "Pattern: {:?}, symbol: {}, expected is_definite to be {}", row.pattern, - step_index, + symbol_name, is_definite, ) } diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 167edebf5f..b5ff7a9e16 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -654,6 +654,7 @@ extern "C" { pub fn ts_query_pattern_is_definite( self_: *const TSQuery, pattern_index: u32, + symbol: TSSymbol, step_index: u32, ) -> bool; } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index d3284974a1..b4d6f8c504 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1467,9 +1467,9 @@ impl Query { /// Check if a pattern will definitely match after a certain number of steps /// have matched. - pub fn pattern_is_definite(&self, index: usize, step_index: usize) -> bool { + pub fn pattern_is_definite(&self, pattern_index: usize, symbol: u16, step_index: usize) -> bool { unsafe { - ffi::ts_query_pattern_is_definite(self.ptr.as_ptr(), index as u32, step_index as u32) + ffi::ts_query_pattern_is_definite(self.ptr.as_ptr(), pattern_index as u32, symbol, step_index as u32) } } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1abbf28cef..850cd31ea9 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -722,6 +722,7 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( bool ts_query_pattern_is_definite( const TSQuery *self, uint32_t pattern_index, + TSSymbol symbol, uint32_t step_index ); diff --git a/lib/src/query.c b/lib/src/query.c index 64a1b8a005..dd6ad8c007 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -149,8 +149,7 @@ typedef struct { /* * AnalysisState - The state needed for walking the parse table when analyzing - * a query pattern, to determine the steps where the pattern could fail - * to match. + * a query pattern, to determine at which steps the pattern might fail to match. */ typedef struct { TSStateId parse_state; @@ -166,6 +165,12 @@ typedef struct { uint16_t step_index; } AnalysisState; +/* + * AnalysisSubgraph - A subset of the states in the parse table that are used + * in constructing nodes with a certain symbol. Each state is accompanied by + * some information about the possible node that could be produced in + * downstream states. + */ typedef struct { TSStateId state; uint8_t production_id; @@ -914,7 +919,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index " {parent: %s, child_index: %u, field: %s, state: %3u, done:%d}", self->language->symbol_names[state->stack[k].parent_symbol], state->stack[k].child_index, - self->language->field_names[state->stack[k].field_id], + state->stack[k].field_id ? self->language->field_names[state->stack[k].field_id] : "", state->stack[k].parse_state, state->stack[k].done ); @@ -1018,7 +1023,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // If this is a hidden child, then push a new entry to the stack, in order to // walk through the children of this child. - else if (next_state.depth < MAX_ANALYSIS_STATE_DEPTH) { + else if (sym >= self->language->token_count && next_state.depth < MAX_ANALYSIS_STATE_DEPTH) { next_state.depth++; analysis_state__top(&next_state)->parse_state = parse_state; analysis_state__top(&next_state)->child_index = 0; @@ -1122,17 +1127,29 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } } - // In order for a parent step to be definite, all of its child steps must - // be definite. Propagate the definiteness up the pattern trees by walking - // the query's steps in reverse. + // In order for a step to be definite, all of its child steps must be definite, + // and all of its later sibling steps must be definite. Propagate any indefiniteness + // upward and backward through the pattern trees. for (unsigned i = self->steps.size - 1; i + 1 > 0; i--) { QueryStep *step = &self->steps.contents[i]; - for (unsigned j = i + 1; j < self->steps.size; j++) { + bool all_later_children_definite = true; + unsigned end_step_index = i + 1; + while (end_step_index < self->steps.size) { + QueryStep *child_step = &self->steps.contents[end_step_index]; + if (child_step->depth <= step->depth || child_step->depth == PATTERN_DONE_MARKER) break; + end_step_index++; + } + for (unsigned j = end_step_index - 1; j > i; j--) { QueryStep *child_step = &self->steps.contents[j]; - if (child_step->depth <= step->depth) break; - if (child_step->depth == step->depth + 1 && !child_step->is_definite) { - step->is_definite = false; - break; + if (child_step->depth == step->depth + 1) { + if (all_later_children_definite) { + if (!child_step->is_definite) { + all_later_children_definite = false; + step->is_definite = false; + } + } else { + child_step->is_definite = false; + } } } } @@ -1870,29 +1887,21 @@ uint32_t ts_query_start_byte_for_pattern( bool ts_query_pattern_is_definite( const TSQuery *self, uint32_t pattern_index, - uint32_t step_count + TSSymbol symbol, + uint32_t index ) { uint32_t step_index = self->patterns.contents[pattern_index].start_step; - for (;;) { - QueryStep *start_step = &self->steps.contents[step_index]; - if (step_index + step_count < self->steps.size) { - QueryStep *step = start_step; - for (unsigned i = 0; i < step_count; i++) { - if (step->depth == PATTERN_DONE_MARKER) { - step = NULL; - break; - } - step++; - } - if (step && !step->is_definite) return false; - } - if (start_step->alternative_index != NONE && start_step->alternative_index > step_index) { - step_index = start_step->alternative_index; - } else { - break; + QueryStep *step = &self->steps.contents[step_index]; + for (; step->depth != PATTERN_DONE_MARKER; step++) { + bool does_match = symbol ? + step->symbol == symbol : + step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL; + if (does_match) { + if (index == 0) return step->is_definite; + index--; } } - return true; + return false; } void ts_query_disable_capture( From 0438ed03ffbb4db86283ae3fcea3529971f1715b Mon Sep 17 00:00:00 2001 From: intrigus-lgtm <60750685+intrigus-lgtm@users.noreply.github.com> Date: Mon, 6 Jul 2020 22:47:10 +0200 Subject: [PATCH 094/282] Fix wrong file name (#666) "build_fuzzers" -> "build-fuzzers". It should be a hypen and not an underscore. --- test/fuzz/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/fuzz/README.md b/test/fuzz/README.md index 649d2d895c..a02d268915 100644 --- a/test/fuzz/README.md +++ b/test/fuzz/README.md @@ -22,10 +22,10 @@ The fuzzers can then be built with: export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \ LIB_FUZZER_PATH=$HOME/src/compiler-rt/lib/fuzzer/libFuzzer.a \ - ./script/build_fuzzers + ./script/build-fuzzers ``` -This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`. +This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build-fuzzers python ruby`. The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments: ``` From 0bf2450b4aa26e79d9fcb1e2007e183ff14d2424 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 6 Jul 2020 15:56:21 -0700 Subject: [PATCH 095/282] Always enforce stack version limit during reductions Fixes #669 --- cli/src/tests/mod.rs | 1 + cli/src/tests/pathological_test.rs | 15 +++++++++++++++ lib/src/parser.c | 18 ++++++++++++------ 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 cli/src/tests/pathological_test.rs diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index ac54db004d..24e8160efb 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -3,6 +3,7 @@ mod helpers; mod highlight_test; mod node_test; mod parser_test; +mod pathological_test; mod query_test; mod tags_test; mod test_highlight_test; diff --git a/cli/src/tests/pathological_test.rs b/cli/src/tests/pathological_test.rs new file mode 100644 index 0000000000..7ebd543911 --- /dev/null +++ b/cli/src/tests/pathological_test.rs @@ -0,0 +1,15 @@ +use super::helpers::allocations; +use super::helpers::fixtures::get_language; +use tree_sitter::Parser; + +#[test] +fn test_pathological_example_1() { + let language = "cpp"; + let source = r#"*ss(qqXstack); - uint32_t removed_version_count = 0; - StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); + // Pop the given number of nodes from the given version of the parse stack. + // If stack versions have previously merged, then there may be more than one + // path back through the stack. For each path, create a new parent node to + // contain the popped children, and push it onto the stack in place of the + // children. + StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); + uint32_t removed_version_count = 0; for (uint32_t i = 0; i < pop.size; i++) { StackSlice slice = pop.contents[i]; StackVersion slice_version = slice.version - removed_version_count; - // Error recovery can sometimes cause lots of stack versions to merge, - // such that a single pop operation can produce a lots of slices. - // Avoid creating too many stack versions in that situation. - if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { + // This is where new versions are added to the parse stack. The versions + // will all be sorted and truncated at the end of the outer parsing loop. + // Allow the maximum version count to be temporarily exceeded, but only + // by a limited threshold. + if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) { ts_stack_remove_version(self->stack, slice_version); ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); removed_version_count++; From 86a5dabbcbdac650c53a889183bf56d7e721e09e Mon Sep 17 00:00:00 2001 From: Jacob Gillespie Date: Tue, 7 Jul 2020 16:45:23 +0100 Subject: [PATCH 096/282] Add TypeScript definition for DSL (#658) --- cli/npm/dsl.d.ts | 356 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 cli/npm/dsl.d.ts diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts new file mode 100644 index 0000000000..b9bf1c9814 --- /dev/null +++ b/cli/npm/dsl.d.ts @@ -0,0 +1,356 @@ +type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; +type BlankRule = {type: 'BLANK'}; +type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; +type FieldRule = {type: 'FIELD'; name: string; content: Rule}; +type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; +type PatternRule = {type: 'PATTERN'; value: string}; +type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; +type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; +type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; +type PrecRule = {type: 'PREC'; content: Rule; value: number}; +type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; +type RepeatRule = {type: 'REPEAT'; content: Rule}; +type SeqRule = {type: 'SEQ'; members: Rule[]}; +type StringRule = {type: 'STRING'; value: string}; +type SymbolRule = {type: 'SYMBOL'; name: Name}; +type TokenRule = {type: 'TOKEN'; content: Rule}; + +type Rule = + | AliasRule + | BlankRule + | ChoiceRule + | FieldRule + | ImmediateTokenRule + | PatternRule + | PrecDynamicRule + | PrecLeftRule + | PrecRightRule + | PrecRule + | Repeat1Rule + | RepeatRule + | SeqRule + | StringRule + | SymbolRule + | TokenRule; + +type RuleOrLiteral = Rule | RegExp | string; + +type GrammarSymbols = { + [name in RuleName]: SymbolRule; +} & + Record>; + +type RuleBuilder = ( + $: GrammarSymbols, +) => RuleOrLiteral; + +type RuleBuilders< + RuleName extends string, + BaseGrammarRuleName extends string +> = { + [name in RuleName]: RuleBuilder; +}; + +interface Grammar< + RuleName extends string, + BaseGrammarRuleName extends string = never, + Rules extends RuleBuilders = RuleBuilders< + RuleName, + BaseGrammarRuleName + > +> { + /** + * Name of the grammar language. + */ + name: string; + + /** Mapping of grammar rule names to rule builder functions. */ + rules: Rules; + + /** + * An array of arrays of rule names. Each inner array represents a set of + * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ + * in the grammar. When these conflicts occur at runtime, Tree-sitter will + * use the GLR algorithm to explore all of the possible interpretations. If + * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree + * whose corresponding rule has the highest total _dynamic precedence_. + * + * @param $ grammar rules + */ + conflicts?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[][]; + + /** + * An array of token names which can be returned by an _external scanner_. + * External scanners allow you to write custom C code which runs during the + * lexing process in order to handle lexical rules (e.g. Python's indentation + * tokens) that cannot be described by regular expressions. + * + * @param $ grammar rules + * @param previous array of externals from the base schema, if any + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + */ + externals?: ( + $: Record>, + previous: Rule[], + ) => SymbolRule[]; + + /** + * An array of tokens that may appear anywhere in the language. This + * is often used for whitespace and comments. The default value of + * extras is to accept whitespace. To control whitespace explicitly, + * specify extras: `$ => []` in your grammar. + * + * @param $ grammar rules + */ + extras?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * An array of rules that should be automatically removed from the + * grammar by replacing all of their usages with a copy of their definition. + * This is useful for rules that are used in multiple places but for which + * you don't want to create syntax tree nodes at runtime. + * + * @param $ grammar rules + */ + inline?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * A list of hidden rule names that should be considered supertypes in the + * generated node types file. + * + * @param $ grammar rules + * + * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + */ + supertypes?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * The name of a token that will match keywords for the purpose of the + * keyword extraction optimization. + * + * @param $ grammar rules + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction + */ + word?: ($: GrammarSymbols) => RuleOrLiteral; +} + +type GrammarSchema = { + [K in keyof Grammar]: K extends 'rules' + ? Record + : Grammar[K]; +}; + +/** + * Causes the given rule to appear with an alternative name in the syntax tree. + * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an + * anonymous node, as if the rule had been written as the simple string. + * + * @param rule rule that will be aliased + * @param name target name for the alias + */ +declare function alias(rule: RuleOrLiteral, name: string): AliasRule; + +/** + * Causes the given rule to appear as an alternative named node, for instance + * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named + * node called `bar`. + * + * @param rule rule that will be aliased + * @param symbol target symbol for the alias + */ +declare function alias( + rule: RuleOrLiteral, + symbol: SymbolRule, +): AliasRule; + +/** + * Creates a blank rule, matching nothing. + */ +declare function blank(): BlankRule; + +/** + * Assigns a field name to the child node(s) matched by the given rule. + * In the resulting syntax tree, you can then use that field name to + * access specific children. + * + * @param name name of the field + * @param rule rule the field should match + */ +declare function field(name: string, rule: RuleOrLiteral): FieldRule; + +/** + * Creates a rule that matches one of a set of possible rules. The order + * of the arguments does not matter. This is analogous to the `|` (pipe) + * operator in EBNF notation. + * + * @param options possible rule choices + */ +declare function choice(...options: RuleOrLiteral[]): ChoiceRule; + +/** + * Creates a rule that matches zero or one occurrence of a given rule. + * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. + * + * @param value rule to be made optional + */ +declare function optional(rule: RuleOrLiteral): ChoiceRule; + +/** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at parser-generation time. When two rules overlap + * in a way that represents either a true ambiguity or a _local_ ambiguity + * given one token of lookahead, Tree-sitter will try to resolve the conflict by + * matching the rule with the higher precedence. The default precedence of all + * rules is zero. This works similarly to the precedence directives in Yacc grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ +declare const prec: { + (number: number, rule: RuleOrLiteral): PrecRule; + + /** + * Marks the given rule as left-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a left-associative rule, Tree-sitter + * will prefer matching a rule that ends _earlier_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as left-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + left(rule: RuleOrLiteral): PrecLeftRule; + left(number: number, rule: RuleOrLiteral): PrecLeftRule; + + /** + * Marks the given rule as right-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a right-associative rule, Tree-sitter + * will prefer matching a rule that ends _later_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as right-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + right(rule: RuleOrLiteral): PrecRightRule; + right(number: number, rule: RuleOrLiteral): PrecRightRule; + + /** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. + * This is only necessary when handling a conflict dynamically using the + * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: + * multiple rules correctly match a given piece of code. In that event, + * Tree-sitter compares the total dynamic precedence associated with each + * rule, and selects the one with the highest total. This is similar to + * dynamic precedence directives in Bison grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html + */ + dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule; +}; + +/** + * Creates a rule that matches _zero-or-more_ occurrences of a given rule. + * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This + * rule is implemented in terms of `repeat1` but is included because it + * is very commonly used. + * + * @param rule rule to repeat, zero or more times + */ +declare function repeat(rule: RuleOrLiteral): RepeatRule; + +/** + * Creates a rule that matches one-or-more occurrences of a given rule. + * + * @param rule rule to repeat, one or more times + */ +declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; + +/** + * Creates a rule that matches any number of other rules, one after another. + * It is analogous to simply writing multiple symbols next to each other + * in EBNF notation. + * + * @param rules ordered rules that comprise the sequence + */ +declare function seq(...rules: RuleOrLiteral[]): SeqRule; + +/** + * Creates a symbol rule, representing another rule in the grammar by name. + * + * @param name name of the target rule + */ +declare function sym(name: Name): SymbolRule; + +/** + * Marks the given rule as producing only a single token. Tree-sitter's + * default is to treat each String or RegExp literal in the grammar as a + * separate token. Each token is matched separately by the lexer and + * returned as its own leaf node in the tree. The token function allows + * you to express a complex rule using the DSL functions (rather + * than as a single regular expression) but still have Tree-sitter treat + * it as a single token. + * + * @param rule rule to represent as a single token + */ +declare const token: { + (rule: RuleOrLiteral): TokenRule; + + /** + * Marks the given rule as producing an immediate token. This allows + * the parser to produce a different token based on whether or not + * there are `extras` preceding the token's main content. When there + * are _no_ leading `extras`, an immediate token is preferred over a + * normal token which would otherwise match. + * + * @param rule rule to represent as an immediate token + */ + immediate(rule: RuleOrLiteral): ImmediateTokenRule; +}; + +/** + * Creates a new language grammar with the provided schema. + * + * @param options grammar options + */ +declare function grammar( + options: Grammar, +): GrammarSchema; + +/** + * Extends an existing language grammar with the provided options, + * creating a new language. + * + * @param baseGrammar base grammar schema to extend from + * @param options grammar options for the new extended language + */ +declare function grammar< + BaseGrammarRuleName extends string, + RuleName extends string +>( + baseGrammar: GrammarSchema, + options: Grammar, +): GrammarSchema; From d614c14c2cfc5911674f233ba7073c3dc3a90fdd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 8 Jul 2020 12:36:59 -0700 Subject: [PATCH 097/282] tags: Make spans refer to name, not entire def/ref Co-authored-by: Tim Clem Co-authored-by: Beka Valentine --- cli/src/tests/tags_test.rs | 23 ++++++++++++++++++----- tags/src/lib.rs | 12 +++++++----- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index fad8ebd866..f3df4b5350 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -2,6 +2,7 @@ use super::helpers::allocations; use super::helpers::fixtures::{get_language, get_language_queries_path}; use std::ffi::CString; use std::{fs, ptr, slice, str}; +use tree_sitter::Point; use tree_sitter_tags::c_lib as c; use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; @@ -150,12 +151,24 @@ fn test_tags_javascript() { assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| (substr(source, &t.name_range), t.span.clone(), t.kind)) .collect::>(), &[ - ("Customer", TagKind::Class), - ("getAge", TagKind::Method), - ("Agent", TagKind::Class) + ( + "Customer", + Point::new(5, 10)..Point::new(5, 18), + TagKind::Class + ), + ( + "getAge", + Point::new(9, 8)..Point::new(9, 14), + TagKind::Method + ), + ( + "Agent", + Point::new(15, 10)..Point::new(15, 15), + TagKind::Class + ) ] ); assert_eq!( @@ -209,7 +222,7 @@ fn test_tags_ruby() { )) .collect::>(), &[ - ("foo", TagKind::Method, (2, 0)), + ("foo", TagKind::Method, (2, 4)), ("bar", TagKind::Call, (7, 4)), ("a", TagKind::Call, (7, 8)), ("b", TagKind::Call, (7, 11)), diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 8d1853bb95..613e56ac62 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -300,7 +300,7 @@ where continue; } - let mut name_range = None; + let mut name_node = None; let mut doc_nodes = Vec::new(); let mut tag_node = None; let mut kind = TagKind::Call; @@ -314,7 +314,7 @@ where } if index == self.config.name_capture_index { - name_range = Some(capture.node.byte_range()); + name_node = Some(capture.node); } else if index == self.config.doc_capture_index { doc_nodes.push(capture.node); } else if index == self.config.call_capture_index { @@ -335,7 +335,9 @@ where } } - if let (Some(tag_node), Some(name_range)) = (tag_node, name_range) { + if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) { + let name_range = name_node.byte_range(); + if pattern_info.name_must_be_non_local { let mut is_local = false; for scope in self.scopes.iter().rev() { @@ -413,7 +415,7 @@ where *pattern_index = mat.pattern_index; *tag = Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: tag_node.start_position()..tag_node.end_position(), + span: name_node.start_position()..name_node.end_position(), kind, range, name_range, @@ -426,7 +428,7 @@ where ( Tag { line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: tag_node.start_position()..tag_node.end_position(), + span: name_node.start_position()..name_node.end_position(), kind, range, name_range, From 255cf0a9cfe58654a40fd166dcbc3a0849073a22 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 8 Jul 2020 15:23:21 -0700 Subject: [PATCH 098/282] tags: Add utf16 column ranges to tags Also, ensure that line ranges contain only valid UTF8. Co-authored-by: Tim Clem Co-authored-by: Beka Valentine --- cli/src/tests/tags_test.rs | 42 +++++++--- tags/src/lib.rs | 159 +++++++++++++++++++++++++++---------- 2 files changed, 148 insertions(+), 53 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index f3df4b5350..c81f696679 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -8,21 +8,21 @@ use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( - (function_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function - (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") + (function_definition + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @function + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (function_definition name: (identifier) @name) @function ( - (class_definition - name: (identifier) @name - body: (block - . (expression_statement (string) @doc))) @class - (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") + (class_definition + name: (identifier) @name + body: (block + . (expression_statement (string) @doc))) @class + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") ) (class_definition @@ -30,6 +30,10 @@ const PYTHON_TAG_QUERY: &'static str = r#" (call function: (identifier) @name) @call + +(call + function: (attribute + attribute: (identifier) @name)) @call "#; const JS_TAG_QUERY: &'static str = r#" @@ -179,6 +183,26 @@ fn test_tags_javascript() { assert_eq!(tags[2].docs, None); } +#[test] +fn test_tags_columns_measured_in_utf16_code_units() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes(); + + let tag = tag_context + .generate_tags(&tags_config, source, None) + .unwrap() + .next() + .unwrap() + .unwrap(); + + assert_eq!(substr(source, &tag.name_range), "hello_α_ω"); + assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32)); + assert_eq!(tag.utf16_column_range, 9..18); +} + #[test] fn test_tags_ruby() { let language = get_language("ruby"); diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 613e56ac62..a240666f31 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -1,10 +1,10 @@ pub mod c_lib; -use memchr::{memchr, memrchr}; +use memchr::memchr; use regex::Regex; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{fmt, mem, str}; +use std::{char, fmt, mem, str}; use tree_sitter::{ Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, }; @@ -43,6 +43,7 @@ pub struct Tag { pub name_range: Range, pub line_range: Range, pub span: Range, + pub utf16_column_range: Range, pub docs: Option, } @@ -404,39 +405,32 @@ where // Only create one tag per node. The tag queue is sorted by node position // to allow for fast lookup. let range = tag_node.byte_range(); - match self - .tag_queue - .binary_search_by_key(&(name_range.end, name_range.start), |(tag, _)| { - (tag.name_range.end, tag.name_range.start) - }) { + let span = name_node.start_position()..name_node.end_position(); + let utf16_column_range = + get_utf16_column_range(self.source, &name_range, &span); + let line_range = + line_range(self.source, name_range.start, span.start, MAX_LINE_LEN); + let tag = Tag { + line_range, + span, + utf16_column_range, + kind, + range, + name_range, + docs, + }; + match self.tag_queue.binary_search_by_key( + &(tag.name_range.end, tag.name_range.start), + |(tag, _)| (tag.name_range.end, tag.name_range.start), + ) { Ok(i) => { - let (tag, pattern_index) = &mut self.tag_queue[i]; + let (existing_tag, pattern_index) = &mut self.tag_queue[i]; if *pattern_index > mat.pattern_index { *pattern_index = mat.pattern_index; - *tag = Tag { - line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: name_node.start_position()..name_node.end_position(), - kind, - range, - name_range, - docs, - }; + *existing_tag = tag; } } - Err(i) => self.tag_queue.insert( - i, - ( - Tag { - line_range: line_range(self.source, range.start, MAX_LINE_LEN), - span: name_node.start_position()..name_node.end_position(), - kind, - range, - name_range, - docs, - }, - mat.pattern_index, - ), - ), + Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)), } } } @@ -475,11 +469,92 @@ impl From for Error { } } -fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range { - let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1); - let max_line_len = max_line_len.min(text.len() - start); - let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len); - start..end +pub struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + +impl<'a> LossyUtf8<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + LossyUtf8 { + bytes, + in_replacement: false, + } + } +} + +impl<'a> Iterator for LossyUtf8<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if self.bytes.is_empty() { + return None; + } + if self.in_replacement { + self.in_replacement = false; + return Some("\u{fffd}"); + } + match str::from_utf8(self.bytes) { + Ok(valid) => { + self.bytes = &[]; + Some(valid) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + let error_start = error.valid_up_to(); + if error_start > 0 { + let result = + unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) }; + self.bytes = &self.bytes[(error_start + error_len)..]; + self.in_replacement = true; + Some(result) + } else { + self.bytes = &self.bytes[error_len..]; + Some("\u{fffd}") + } + } else { + None + } + } + } + } +} + +fn line_range( + text: &[u8], + start_byte: usize, + start_point: Point, + max_line_len: usize, +) -> Range { + let line_start_byte = start_byte - start_point.column; + let max_line_len = max_line_len.min(text.len() - line_start_byte); + let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)]; + let len = if let Some(len) = memchr(b'\n', text_after_line_start) { + len + } else { + match str::from_utf8(text_after_line_start) { + Ok(s) => s.len(), + Err(e) => e.valid_up_to(), + } + }; + line_start_byte..(line_start_byte + len) +} + +fn get_utf16_column_range( + text: &[u8], + byte_range: &Range, + point_range: &Range, +) -> Range { + let start = byte_range.start - point_range.start.column; + let preceding_text_on_line = &text[start..byte_range.start]; + let start_col = utf16_len(preceding_text_on_line); + start_col..(start_col + utf16_len(&text[byte_range.clone()])) +} + +fn utf16_len(bytes: &[u8]) -> usize { + LossyUtf8::new(bytes) + .flat_map(|chunk| chunk.chars().map(char::len_utf16)) + .sum() } #[cfg(test)] @@ -488,14 +563,10 @@ mod tests { #[test] fn test_get_line() { - let text = b"abc\ndefg\nhijkl"; - assert_eq!(line_range(text, 0, 10), 0..3); - assert_eq!(line_range(text, 1, 10), 0..3); - assert_eq!(line_range(text, 2, 10), 0..3); - assert_eq!(line_range(text, 3, 10), 0..3); - assert_eq!(line_range(text, 1, 2), 0..2); - assert_eq!(line_range(text, 4, 10), 4..8); - assert_eq!(line_range(text, 5, 10), 4..8); - assert_eq!(line_range(text, 11, 10), 9..14); + let text = "abc\ndefg❤hij\nklmno".as_bytes(); + assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14); + assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8); + assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20); + assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19); } } From e9ea8192a3428a9a204167c27e7d0a76cbd4efd8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 09:11:34 -0700 Subject: [PATCH 099/282] Mention node version >= 6 in docs Fixes #677 --- docs/section-3-creating-parsers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index c877ba6f7d..b075e48873 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -13,7 +13,7 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo In order to develop a Tree-sitter parser, there are two dependencies that you need to install: -* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have. +* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater. * **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform. ### Installation From b52f28d6d5d740a85e539cde221b6742106f488f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 11:28:07 -0700 Subject: [PATCH 100/282] Allow measuring time for tags subcommand --- cli/src/main.rs | 20 ++++++++++++--- cli/src/tags.rs | 67 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 757c70eb6d..d7a5e7b1f1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -97,6 +97,8 @@ fn run() -> error::Result<()> { .value_name("json|protobuf") .help("Determine output format (default: json)"), ) + .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("time").long("quiet").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg( Arg::with_name("inputs") @@ -149,8 +151,14 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("path").index(1).multiple(true)), ) .subcommand( - SubCommand::with_name("web-ui").about("Test a parser interactively in the browser") - .arg(Arg::with_name("quiet").long("quiet").short("q").help("open in default browser")), + SubCommand::with_name("web-ui") + .about("Test a parser interactively in the browser") + .arg( + Arg::with_name("quiet") + .long("quiet") + .short("q") + .help("open in default browser"), + ), ) .subcommand( SubCommand::with_name("dump-languages") @@ -268,7 +276,13 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("tags") { loader.find_all_languages(&config.parser_directories)?; let paths = collect_paths(matches.values_of("inputs").unwrap())?; - tags::generate_tags(&loader, matches.value_of("scope"), &paths)?; + tags::generate_tags( + &loader, + matches.value_of("scope"), + &paths, + matches.is_present("quiet"), + matches.is_present("time"), + )?; } else if let Some(matches) = matches.subcommand_matches("highlight") { loader.configure_highlights(&config.theme.highlight_names); loader.find_all_languages(&config.parser_directories)?; diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 515f4c5264..5ea00f39d1 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -3,10 +3,17 @@ use super::util; use crate::error::{Error, Result}; use std::io::{self, Write}; use std::path::Path; +use std::time::Instant; use std::{fs, str}; use tree_sitter_tags::TagsContext; -pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> { +pub fn generate_tags( + loader: &Loader, + scope: Option<&str>, + paths: &[String], + quiet: bool, + time: bool, +) -> Result<()> { let mut lang = None; if let Some(scope) = scope { lang = loader.language_configuration_for_scope(scope)?; @@ -34,36 +41,50 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> }; if let Some(tags_config) = language_config.tags_config(language)? { - let ident = if paths.len() > 1 { - let path_str = format!("{:?}", path); - writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; - "\t" + let indent; + if paths.len() > 1 { + if !quiet { + writeln!(&mut stdout, "{}", path.to_string_lossy())?; + } + indent = "\t" } else { - "" + indent = ""; }; let source = fs::read(path)?; + let t0 = Instant::now(); for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? { let tag = tag?; - write!( - &mut stdout, - "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", - ident, - str::from_utf8(&source[tag.name_range]).unwrap_or(""), - &tags_config.syntax_type_name(tag.syntax_type_id), - if tag.is_definition { "def" } else { "ref" }, - tag.span.start, - tag.span.end, - str::from_utf8(&source[tag.line_range]).unwrap_or(""), - )?; - if let Some(docs) = tag.docs { - if docs.len() > 120 { - write!(&mut stdout, "\t{:?}...", &docs[0..120])?; - } else { - write!(&mut stdout, "\t{:?}", &docs)?; + if !quiet { + write!( + &mut stdout, + "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", + indent, + str::from_utf8(&source[tag.name_range]).unwrap_or(""), + &tags_config.syntax_type_name(tag.syntax_type_id), + if tag.is_definition { "def" } else { "ref" }, + tag.span.start, + tag.span.end, + str::from_utf8(&source[tag.line_range]).unwrap_or(""), + )?; + if let Some(docs) = tag.docs { + if docs.len() > 120 { + write!(&mut stdout, "\t{:?}...", &docs[0..120])?; + } else { + write!(&mut stdout, "\t{:?}", &docs)?; + } } + writeln!(&mut stdout, "")?; } - writeln!(&mut stdout, "")?; + } + + if time { + writeln!( + &mut stdout, + "{}time: {}ms", + indent, + t0.elapsed().as_millis(), + )?; } } else { eprintln!("No tags config found for path {:?}", path); From 1ecfc2548f1dfe0aa2ec34fb174555a27f37dde0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 11:30:30 -0700 Subject: [PATCH 101/282] tags: Move impls below type definitions --- tags/src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 7d58d99bdd..790b866a75 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -63,15 +63,6 @@ pub enum Error { InvalidCapture(String), } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name), - _ => write!(f, "{:?}", self) - } - } -} - #[derive(Debug, Default)] struct PatternInfo { docs_adjacent_capture: Option, @@ -475,6 +466,15 @@ where } } +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::InvalidCapture(name) => write!(f, "Invalid capture @{}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).", name), + _ => write!(f, "{:?}", self) + } + } +} + impl From for Error { fn from(error: regex::Error) -> Self { Error::Regex(error) From 52360b103d0b293c54e83a188d7f2f1b9a7dc5d8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 12:07:57 -0700 Subject: [PATCH 102/282] tags: Fix comment position --- tags/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 790b866a75..41b4557a0c 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -423,8 +423,6 @@ where } } - // Only create one tag per node. The tag queue is sorted by node position - // to allow for fast lookup. let range = tag_node.byte_range(); let span = name_node.start_position()..name_node.end_position(); let utf16_column_range = @@ -441,6 +439,9 @@ where is_definition, syntax_type_id, }; + + // Only create one tag per node. The tag queue is sorted by node position + // to allow for fast lookup. match self.tag_queue.binary_search_by_key( &(tag.name_range.end, tag.name_range.start), |(tag, _)| (tag.name_range.end, tag.name_range.start), From 0f805603104cab4d59c9f02154720fd000b22305 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 12:13:12 -0700 Subject: [PATCH 103/282] tags: Reuse work when computing utf16 columns, line ranges --- tags/src/lib.rs | 64 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 41b4557a0c..ca5699ca4e 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -91,6 +91,7 @@ where matches: I, _tree: Tree, source: &'a [u8], + prev_line_info: Option, config: &'a TagsConfiguration, cancellation_flag: Option<&'a AtomicUsize>, iter_count: usize, @@ -98,6 +99,13 @@ where scopes: Vec>, } +struct LineInfo { + utf8_position: Point, + utf8_byte: usize, + utf16_column: usize, + line_range: Range, +} + impl TagsConfiguration { pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?; @@ -260,6 +268,7 @@ impl TagsContext { source, config, cancellation_flag, + prev_line_info: None, tag_queue: Vec::new(), iter_count: 0, scopes: vec![LocalScope { @@ -425,10 +434,46 @@ where let range = tag_node.byte_range(); let span = name_node.start_position()..name_node.end_position(); - let utf16_column_range = - get_utf16_column_range(self.source, &name_range, &span); - let line_range = - line_range(self.source, name_range.start, span.start, MAX_LINE_LEN); + + // Compute tag properties that depend on the text of the containing line. If the + // previous tag occurred on the same line, then reuse results from the previous tag. + let line_range; + let mut prev_utf16_column = 0; + let mut prev_utf8_byte = name_range.start - span.start.column; + let line_info = self.prev_line_info.as_ref().and_then(|info| { + if info.utf8_position.row == span.start.row { + Some(info) + } else { + None + } + }); + if let Some(line_info) = line_info { + line_range = line_info.line_range.clone(); + if line_info.utf8_position.column <= span.start.column { + prev_utf8_byte = line_info.utf8_byte; + prev_utf16_column = line_info.utf16_column; + } + } else { + line_range = self::line_range( + self.source, + name_range.start, + span.start, + MAX_LINE_LEN, + ); + } + + let utf16_start_column = prev_utf16_column + + utf16_len(&self.source[prev_utf8_byte..name_range.start]); + let utf16_end_column = + utf16_start_column + utf16_len(&self.source[name_range.clone()]); + let utf16_column_range = utf16_start_column..utf16_end_column; + + self.prev_line_info = Some(LineInfo { + utf8_position: span.end, + utf8_byte: name_range.end, + utf16_column: utf16_end_column, + line_range: line_range.clone(), + }); let tag = Tag { line_range, span, @@ -570,17 +615,6 @@ fn line_range( line_start_byte..line_end_byte } -fn get_utf16_column_range( - text: &[u8], - byte_range: &Range, - point_range: &Range, -) -> Range { - let line_start_byte = byte_range.start - point_range.start.column; - let preceding_text_on_line = &text[line_start_byte..byte_range.start]; - let start_col = utf16_len(preceding_text_on_line); - start_col..(start_col + utf16_len(&text[byte_range.clone()])) -} - fn utf16_len(bytes: &[u8]) -> usize { LossyUtf8::new(bytes) .flat_map(|chunk| chunk.chars().map(char::len_utf16)) From 9e38fd9f5c32b58919c1cb422f06c8021da98207 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 12:32:40 -0700 Subject: [PATCH 104/282] Add todo comment for LossyUtf8 iterator --- tags/src/lib.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index ca5699ca4e..dcbb9984d0 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -106,6 +106,11 @@ struct LineInfo { line_range: Range, } +struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + impl TagsConfiguration { pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?; @@ -533,13 +538,11 @@ impl From for Error { } } -pub struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - +// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` +// is ever stabilized, we should use that. Otherwise, this struct could be moved +// into some module that's shared between `tree-sitter-tags` and `tree-sitter-highlight`. impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { + fn new(bytes: &'a [u8]) -> Self { LossyUtf8 { bytes, in_replacement: false, From 6cee04350f909c6611258ccaee06446e08218f0c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 9 Jul 2020 13:39:47 -0700 Subject: [PATCH 105/282] tags: Expose utf16 column range to C API --- tags/include/tree_sitter/tags.h | 2 ++ tags/src/c_lib.rs | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index 58f5bbd9a1..f2b17075d4 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -28,6 +28,8 @@ typedef struct { uint32_t line_end_byte; TSPoint start_point; TSPoint end_point; + uint32_t utf16_start_column; + uint32_t utf16_end_column; uint32_t docs_start_byte; uint32_t docs_end_byte; uint32_t syntax_type_id; diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 77f8aae5c0..07e1e19ae3 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -36,6 +36,8 @@ pub struct TSTag { pub line_end_byte: u32, pub start_point: TSPoint, pub end_point: TSPoint, + pub utf16_start_colum: u32, + pub utf16_end_colum: u32, pub docs_start_byte: u32, pub docs_end_byte: u32, pub syntax_type_id: u32, @@ -161,6 +163,8 @@ pub extern "C" fn ts_tagger_tag( row: tag.span.end.row as u32, column: tag.span.end.column as u32, }, + utf16_start_colum: tag.utf16_column_range.start as u32, + utf16_end_colum: tag.utf16_column_range.end as u32, docs_start_byte: prev_docs_len as u32, docs_end_byte: buffer.docs.len() as u32, syntax_type_id: tag.syntax_type_id, @@ -225,7 +229,7 @@ pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( *len = 0; if let Some(config) = tagger.languages.get(scope_name) { *len = config.c_syntax_type_names.len() as u32; - return config.c_syntax_type_names.as_ptr() as *const *const i8 + return config.c_syntax_type_names.as_ptr() as *const *const i8; } std::ptr::null() } From 0bfd47e2e5631af43ddf30abdac2043051bbe8af Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 10 Jul 2020 10:12:46 -0700 Subject: [PATCH 106/282] Improve error message when failing to run graphviz stuff Fixes #682 --- cli/src/util.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cli/src/util.rs b/cli/src/util.rs index 8978ecc16c..9f941f62ae 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -1,3 +1,4 @@ +use super::error::{Error, Result}; use std::io; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -31,12 +32,12 @@ pub struct LogSession(); pub struct LogSession(PathBuf, Option, Option); #[cfg(windows)] -pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result { +pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { Ok(LogSession()) } #[cfg(unix)] -pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result { +pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { use std::io::Write; let mut dot_file = std::fs::File::create(path)?; @@ -46,11 +47,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result Date: Fri, 10 Jul 2020 13:33:04 -0700 Subject: [PATCH 107/282] highlight: Avoid accidentally treating locals patterns as highlight patterns --- highlight/src/lib.rs | 163 ++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 88 deletions(-) diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index d2e27b46df..bb11021966 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -620,7 +620,7 @@ where type Item = Result; fn next(&mut self) -> Option { - loop { + 'main: loop { // If we've already determined the next highlight boundary, just return it. if let Some(e) = self.next_event.take() { return Some(Ok(e)); @@ -640,29 +640,34 @@ where // If none of the layers have any more highlight boundaries, terminate. if self.layers.is_empty() { - if self.byte_offset < self.source.len() { + return if self.byte_offset < self.source.len() { let result = Some(Ok(HighlightEvent::Source { start: self.byte_offset, end: self.source.len(), })); self.byte_offset = self.source.len(); - return result; + result } else { - return None; - } + None + }; } // Get the next capture from whichever layer has the earliest highlight boundary. - let match_; - let mut captures; - let mut capture; - let mut pattern_index; + let range; let layer = &mut self.layers[0]; - if let Some((m, capture_index)) = layer.captures.peek() { - match_ = m; - captures = match_.captures; - pattern_index = match_.pattern_index; - capture = captures[*capture_index]; + if let Some((next_match, capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*capture_index]; + range = next_capture.node.byte_range(); + + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } } // If there are no more captures, then emit any remaining highlight end events. // And if there are none of those, then just advance to the end of the document. @@ -673,30 +678,17 @@ where return self.emit_event(self.source.len(), None); }; - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - let range = capture.node.byte_range(); - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } + let (mut match_, capture_index) = layer.captures.next().unwrap(); + let mut capture = match_.captures[capture_index]; // If this capture represents an injection, then process the injection. - if pattern_index < layer.config.locals_pattern_index { + if match_.pattern_index < layer.config.locals_pattern_index { let (language_name, content_node, include_children) = - injection_for_match(&layer.config, &layer.config.query, match_, &self.source); + injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. The `unwrap` is ok because - layer.captures.next().unwrap().0.remove(); + // in the stream of captures. + match_.remove(); // If a language is found with the given name, then add a new language layer // to the highlighted document. @@ -729,16 +721,19 @@ where } self.sort_layers(); - continue; + continue 'main; } - layer.captures.next(); + // Remove from the local scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } // If this capture is for tracking local variables, then process the // local variable info. let mut reference_highlight = None; let mut definition_highlight = None; - while pattern_index < layer.config.highlights_pattern_index { + while match_.pattern_index < layer.config.highlights_pattern_index { // If the node represents a local scope, push a new local scope onto // the scope stack. if Some(capture.index) == layer.config.local_scope_capture_index { @@ -748,7 +743,7 @@ where range: range.clone(), local_defs: Vec::new(), }; - for prop in layer.config.query.property_settings(pattern_index) { + for prop in layer.config.query.property_settings(match_.pattern_index) { match prop.key.as_ref() { "local.scope-inherits" => { scope.inherits = @@ -767,7 +762,7 @@ where let scope = layer.scope_stack.last_mut().unwrap(); let mut value_range = 0..0; - for capture in captures { + for capture in match_.captures { if Some(capture.index) == layer.config.local_def_value_capture_index { value_range = capture.node.byte_range(); } @@ -810,84 +805,76 @@ where } } - // Continue processing any additional local-variable-tracking patterns - // for the same node. + // Continue processing any additional matches for the same node. if let Some((next_match, next_capture_index)) = layer.captures.peek() { let next_capture = next_match.captures[*next_capture_index]; if next_capture.node == capture.node { - pattern_index = next_match.pattern_index; - captures = next_match.captures; capture = next_capture; - layer.captures.next(); + match_ = layer.captures.next().unwrap().0; continue; - } else { - break; } } - break; + self.sort_layers(); + continue 'main; } // Otherwise, this capture must represent a highlight. - let mut has_highlight = true; - // If this exact range has already been highlighted by an earlier pattern, or by // a different layer, then skip over this one. if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { if range.start == last_start && range.end == last_end && layer.depth < last_depth { - has_highlight = false; + self.sort_layers(); + continue 'main; } } // If the current node was found to be a local variable, then skip over any // highlighting patterns that are disabled for local variables. - while has_highlight - && (definition_highlight.is_some() || reference_highlight.is_some()) - && layer.config.non_local_variable_patterns[pattern_index] - { - has_highlight = false; - if let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - has_highlight = true; - pattern_index = next_match.pattern_index; - layer.captures.next(); - continue; + if definition_highlight.is_some() || reference_highlight.is_some() { + while layer.config.non_local_variable_patterns[match_.pattern_index] { + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = layer.captures.next().unwrap().0; + continue; + } } + + self.sort_layers(); + continue 'main; } - break; } - if has_highlight { - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = layer.captures.peek() { - if next_match.captures[*next_capture_index].node == capture.node { - layer.captures.next(); - } else { - break; - } + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + layer.captures.next(); + } else { + break; } + } - let current_highlight = layer.config.highlight_indices[capture.index as usize]; + let current_highlight = layer.config.highlight_indices[capture.index as usize]; - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; + } - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + self.last_highlight_range = Some((range.start, range.end, layer.depth)); + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); } self.sort_layers(); From e4e785b567eb975c5fa6900b08728aac856bdaad Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 10 Jul 2020 13:47:56 -0700 Subject: [PATCH 108/282] Remove unused flags from tags CLI command --- cli/src/main.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index d7a5e7b1f1..713bf28fae 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -90,13 +90,6 @@ fn run() -> error::Result<()> { ) .subcommand( SubCommand::with_name("tags") - .arg( - Arg::with_name("format") - .short("f") - .long("format") - .value_name("json|protobuf") - .help("Determine output format (default: json)"), - ) .arg(Arg::with_name("quiet").long("quiet").short("q")) .arg(Arg::with_name("time").long("quiet").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) @@ -106,12 +99,6 @@ fn run() -> error::Result<()> { .index(1) .required(true) .multiple(true), - ) - .arg( - Arg::with_name("v") - .short("v") - .multiple(true) - .help("Sets the level of verbosity"), ), ) .subcommand( From c2fb0f5229b1bb72005da5177457fafb1560954a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tu=E1=BA=A5n-Anh=20Nguy=E1=BB=85n?= Date: Sun, 12 Jul 2020 20:45:17 +0700 Subject: [PATCH 109/282] cli: Add --byte-range flag to query command --- cli/src/main.rs | 12 +++++++++++- cli/src/query.rs | 4 ++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 713bf28fae..a543202d94 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -85,6 +85,12 @@ fn run() -> error::Result<()> { .multiple(true) .required(true), ) + .arg( + Arg::with_name("beg>: error::Result<()> { matches.value_of("scope"), )?; let query_path = Path::new(matches.value_of("query-path").unwrap()); - query::query_files_at_paths(language, paths, query_path, ordered_captures)?; + let range = matches.value_of("beg>: = br.split(":").collect(); + (r[0].parse().unwrap(), r[1].parse().unwrap()) + }); + query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?; } else if let Some(matches) = matches.subcommand_matches("tags") { loader.find_all_languages(&config.parser_directories)?; let paths = collect_paths(matches.values_of("inputs").unwrap())?; diff --git a/cli/src/query.rs b/cli/src/query.rs index 4724227336..8d097911c3 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -9,6 +9,7 @@ pub fn query_files_at_paths( paths: Vec<&Path>, query_path: &Path, ordered_captures: bool, + range: Option<(usize, usize)>, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); @@ -20,6 +21,9 @@ pub fn query_files_at_paths( .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?; let mut query_cursor = QueryCursor::new(); + if let Some((beg, end)) = range { + query_cursor.set_byte_range(beg, end); + } let mut parser = Parser::new(); parser.set_language(language).map_err(|e| e.to_string())?; From 91a715799e1b468c8303c7c612416c04f5a9c5fb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 14 Jul 2020 15:04:39 -0700 Subject: [PATCH 110/282] Accept a paths file to most CLI subcommands --- cli/src/main.rs | 111 +++++++++++++++++++++++++++-------------------- cli/src/query.rs | 6 +-- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index a543202d94..0668d08dc3 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -53,11 +53,12 @@ fn run() -> error::Result<()> { .subcommand( SubCommand::with_name("parse") .about("Parse files") + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(1) .multiple(true) - .required(true), + .required(false), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("debug").long("debug").short("d")) @@ -79,17 +80,18 @@ fn run() -> error::Result<()> { SubCommand::with_name("query") .about("Search files using a syntax tree query") .arg(Arg::with_name("query-path").index(1).required(true)) + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(2) .multiple(true) - .required(true), + .required(false), ) .arg( - Arg::with_name("beg>: error::Result<()> { .arg(Arg::with_name("quiet").long("quiet").short("q")) .arg(Arg::with_name("time").long("quiet").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("inputs") + Arg::with_name("paths") .help("The source file to use") .index(1) - .required(true) .multiple(true), ), ) @@ -122,11 +124,12 @@ fn run() -> error::Result<()> { .subcommand( SubCommand::with_name("highlight") .about("Highlight a file") + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(1) .multiple(true) - .required(true), + .required(false), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")) @@ -225,7 +228,9 @@ fn run() -> error::Result<()> { let timeout = matches .value_of("timeout") .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); - let paths = collect_paths(matches.values_of("path").unwrap())?; + + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap(); let mut has_error = false; loader.find_all_languages(&config.parser_directories)?; @@ -251,28 +256,23 @@ fn run() -> error::Result<()> { } } else if let Some(matches) = matches.subcommand_matches("query") { let ordered_captures = matches.values_of("captures").is_some(); - let paths = matches - .values_of("path") - .unwrap() - .into_iter() - .map(Path::new) - .collect::>(); + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; loader.find_all_languages(&config.parser_directories)?; let language = select_language( &mut loader, - paths[0], + Path::new(&paths[0]), ¤t_dir, matches.value_of("scope"), )?; let query_path = Path::new(matches.value_of("query-path").unwrap()); - let range = matches.value_of("beg>: = br.split(":").collect(); (r[0].parse().unwrap(), r[1].parse().unwrap()) }); query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?; } else if let Some(matches) = matches.subcommand_matches("tags") { loader.find_all_languages(&config.parser_directories)?; - let paths = collect_paths(matches.values_of("inputs").unwrap())?; + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; tags::generate_tags( &loader, matches.value_of("scope"), @@ -285,7 +285,7 @@ fn run() -> error::Result<()> { loader.find_all_languages(&config.parser_directories)?; let time = matches.is_present("time"); - let paths = collect_paths(matches.values_of("path").unwrap())?; + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; let html_mode = matches.is_present("html"); if html_mode { println!("{}", highlight::HTML_HEADER); @@ -358,39 +358,58 @@ fn run() -> error::Result<()> { Ok(()) } -fn collect_paths<'a>(paths: impl Iterator) -> error::Result> { - let mut result = Vec::new(); +fn collect_paths<'a>( + paths_file: Option<&str>, + paths: Option>, +) -> error::Result> { + if let Some(paths_file) = paths_file { + return Ok(fs::read_to_string(paths_file) + .map_err(Error::wrap(|| { + format!("Failed to read paths file {}", paths_file) + }))? + .trim() + .split_ascii_whitespace() + .map(String::from) + .collect::>()); + } + + if let Some(paths) = paths { + let mut result = Vec::new(); - let mut incorporate_path = |path: &str, positive| { - if positive { - result.push(path.to_string()); - } else { - if let Some(index) = result.iter().position(|p| p == path) { - result.remove(index); + let mut incorporate_path = |path: &str, positive| { + if positive { + result.push(path.to_string()); + } else { + if let Some(index) = result.iter().position(|p| p == path) { + result.remove(index); + } } - } - }; + }; - for mut path in paths { - let mut positive = true; - if path.starts_with("!") { - positive = false; - path = path.trim_start_matches("!"); - } + for mut path in paths { + let mut positive = true; + if path.starts_with("!") { + positive = false; + path = path.trim_start_matches("!"); + } - if Path::new(path).exists() { - incorporate_path(path, positive); - } else { - let paths = - glob(path).map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?; - for path in paths { - if let Some(path) = path?.to_str() { - incorporate_path(path, positive); + if Path::new(path).exists() { + incorporate_path(path, positive); + } else { + let paths = glob(path) + .map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?; + for path in paths { + if let Some(path) = path?.to_str() { + incorporate_path(path, positive); + } } } } + + return Ok(result); } - Ok(result) + + Err(Error::new("Must provide one or more paths".to_string())) } fn select_language( diff --git a/cli/src/query.rs b/cli/src/query.rs index 8d097911c3..e71e62540c 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -6,7 +6,7 @@ use tree_sitter::{Language, Node, Parser, Query, QueryCursor}; pub fn query_files_at_paths( language: Language, - paths: Vec<&Path>, + paths: Vec, query_path: &Path, ordered_captures: bool, range: Option<(usize, usize)>, @@ -29,9 +29,9 @@ pub fn query_files_at_paths( parser.set_language(language).map_err(|e| e.to_string())?; for path in paths { - writeln!(&mut stdout, "{}", path.to_str().unwrap())?; + writeln!(&mut stdout, "{}", path)?; - let source_code = fs::read(path).map_err(Error::wrap(|| { + let source_code = fs::read(&path).map_err(Error::wrap(|| { format!("Error reading source file {:?}", path) }))?; let text_callback = |n: Node| &source_code[n.byte_range()]; From 4535efce69016d28360618f9fc13e4ad4401b545 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Jul 2020 09:39:06 -0700 Subject: [PATCH 111/282] query: Prevent dropping of matches when exceeding range maximum Fixes #685 --- cli/src/tests/query_test.rs | 39 +++++++++++++++++++++++++++++++++ lib/src/query.c | 43 ++++++++++++++++++++++++++----------- 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index d4f18c7ddb..06ecc42ea5 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1189,6 +1189,45 @@ fn test_query_matches_within_byte_range() { }); } +#[test] +fn test_query_captures_within_byte_range() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + " + (call_expression + function: (identifier) @function + arguments: (argument_list (string_literal) @string.arg)) + + (string_literal) @string + ", + ) + .unwrap(); + + let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = + cursor + .set_byte_range(3, 27) + .captures(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_captures(captures, &query, source), + &[ + ("function", "DEFUN"), + ("string.arg", "\"safe-length\""), + ("string", "\"safe-length\""), + ] + ); + }); +} + #[test] fn test_query_matches_different_queries_same_cursor() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index ff243494a2..b95ba0574d 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -172,6 +172,7 @@ struct TSQueryCursor { TSPoint start_point; TSPoint end_point; bool ascending; + bool halted; }; static const TSQueryError PARENT_DONE = -1; @@ -1286,6 +1287,7 @@ TSQueryCursor *ts_query_cursor_new(void) { TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); *self = (TSQueryCursor) { .ascending = false, + .halted = false, .states = array_new(), .finished_states = array_new(), .capture_list_pool = capture_list_pool_new(), @@ -1319,6 +1321,7 @@ void ts_query_cursor_exec( self->next_state_id = 0; self->depth = 0; self->ascending = false; + self->halted = false; self->query = query; } @@ -1522,18 +1525,30 @@ static QueryState *ts_query__cursor_copy_state( // `finished_states` array. Multiple patterns can finish on the same node. If // there are no more matches, return `false`. static inline bool ts_query_cursor__advance(TSQueryCursor *self) { - do { + bool did_match = false; + for (;;) { + if (self->halted) { + while (self->states.size > 0) { + QueryState state = array_pop(&self->states); + capture_list_pool_release( + &self->capture_list_pool, + state.capture_list_id + ); + } + } + + if (did_match || self->halted) return did_match; + if (self->ascending) { LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); // Leave this node by stepping to its next sibling or to its parent. - bool did_move = true; if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { self->ascending = false; } else if (ts_tree_cursor_goto_parent(&self->cursor)) { self->depth--; } else { - did_move = false; + self->halted = true; } // After leaving a node, remove any states that cannot make further progress. @@ -1545,10 +1560,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If a state completed its pattern inside of this node, but was deferred from finishing // in order to search for longer matches, mark it as finished. if (step->depth == PATTERN_DONE_MARKER) { - if (state->start_depth > self->depth || !did_move) { + if (state->start_depth > self->depth || self->halted) { LOG(" finish pattern %u\n", state->pattern_index); state->id = self->next_state_id++; array_push(&self->finished_states, *state); + did_match = true; deleted_count++; continue; } @@ -1575,10 +1591,6 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } } self->states.size -= deleted_count; - - if (!did_move) { - return self->finished_states.size > 0; - } } else { // If this node is before the selected range, then avoid descending into it. TSNode node = ts_tree_cursor_current_node(&self->cursor); @@ -1596,7 +1608,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if ( self->end_byte <= ts_node_start_byte(node) || point_lte(self->end_point, ts_node_start_point(node)) - ) return false; + ) { + self->halted = true; + continue; + } // Get the properties of the current node. TSSymbol symbol = ts_node_symbol(node); @@ -1888,6 +1903,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { state->id = self->next_state_id++; array_push(&self->finished_states, *state); array_erase(&self->states, state - self->states.contents); + did_match = true; i--; } } @@ -1901,9 +1917,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { self->ascending = true; } } - } while (self->finished_states.size == 0); - - return true; + } } bool ts_query_cursor_next_match( @@ -2043,7 +2057,10 @@ bool ts_query_cursor_next_capture( // If there are no finished matches that are ready to be returned, then // continue finding more matches. - if (!ts_query_cursor__advance(self)) return false; + if ( + !ts_query_cursor__advance(self) && + self->finished_states.size == 0 + ) return false; } } From f4adf0269af810e410c40a663c561511fb8c0467 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Jul 2020 09:53:01 -0700 Subject: [PATCH 112/282] Propagate dynamic precedence correctly for inlined rules Fixes #683 --- cli/src/generate/prepare_grammar/process_inlines.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 9ef89d75c4..f83658b2f4 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder { last_inserted_step.associativity = removed_step.associativity; } } + if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() { + production.dynamic_precedence = p.dynamic_precedence; + } production }), ); @@ -226,7 +229,7 @@ mod tests { ], }, Production { - dynamic_precedence: 0, + dynamic_precedence: -2, steps: vec![ProductionStep::new(Symbol::terminal(14))], }, ], @@ -258,7 +261,7 @@ mod tests { ], }, Production { - dynamic_precedence: 0, + dynamic_precedence: -2, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(14)), From c4fca5f73e194988dbb2790aa37f93fffaa284f5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Jul 2020 14:19:59 -0700 Subject: [PATCH 113/282] node types: Fix handling of repetitions inside of fields Fixes #676 --- cli/src/generate/node_types.rs | 233 +++++++++++++++++++++------------ 1 file changed, 146 insertions(+), 87 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 9c3bea6477..6df408074b 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -19,7 +19,7 @@ pub(crate) struct FieldInfo { #[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, - pub child_types: Vec, + pub children: FieldInfo, pub children_without_fields: FieldInfo, pub has_multi_step_production: bool, } @@ -70,7 +70,7 @@ impl Default for FieldInfoJSON { impl Default for ChildQuantity { fn default() -> Self { - Self::zero() + Self::one() } } @@ -158,7 +158,7 @@ pub(crate) fn get_variable_info( // Each variable's summary can depend on the summaries of other hidden variables, // and variables can have mutually recursive structure. So we compute the summaries - // iteratively, in a loop that terminates only when more changes are possible. + // iteratively, in a loop that terminates only when no more changes are possible. let mut did_change = true; let mut all_initialized = false; let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()]; @@ -168,13 +168,14 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { let mut variable_info = result[i].clone(); - // Within a variable, consider each production separately. For each - // production, determine which children and fields can occur, and how many - // times they can occur. - for (production_index, production) in variable.productions.iter().enumerate() { - let mut field_quantities = HashMap::new(); - let mut children_without_fields_quantity = ChildQuantity::zero(); - let mut has_uninitialized_invisible_children = false; + // Examine each of the variable's productions. The variable's child types can be + // immediately combined across all productions, but the child quantities must be + // recorded separately for each production. + for production in &variable.productions { + let mut production_field_quantities = HashMap::new(); + let mut production_children_quantity = ChildQuantity::zero(); + let mut production_children_without_fields_quantity = ChildQuantity::zero(); + let mut production_has_uninitialized_invisible_children = false; if production.steps.len() > 1 { variable_info.has_multi_step_production = true; @@ -190,106 +191,92 @@ pub(crate) fn get_variable_info( ChildType::Normal(child_symbol) }; - // Record all of the types of direct children. - did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type); + let child_is_hidden = !child_type_is_visible(&child_type) + && !syntax_grammar.supertype_symbols.contains(&child_symbol); - // Record all of the field names that occur. + // Maintain the set of all child types for this variable, and the quantity of + // visible children in this production. + did_change |= sorted_vec_insert(&mut variable_info.children.types, &child_type); + if !child_is_hidden { + production_children_quantity.append(ChildQuantity::one()); + } + + // Maintain the set of child types associated with each field, and the quantity + // of children associated with each field in this production. if let Some(field_name) = &step.field_name { - // Record how many times each field occurs in this production. - field_quantities - .entry(field_name) - .or_insert(ChildQuantity::zero()) - .append(ChildQuantity::one()); - - // Record the types of children for this field. - let field_info = - variable_info.fields.entry(field_name.clone()).or_insert({ - let mut info = FieldInfo { - types: Vec::new(), - quantity: ChildQuantity::one(), - }; - - // If this field did *not* occur in an earlier production, - // then it is not required. - if production_index > 0 { - info.quantity.required = false; - } - info - }); + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert(FieldInfo::default()); did_change |= sorted_vec_insert(&mut field_info.types, &child_type); - } - // Record named children without fields. - else if child_type_is_named(&child_type) { - // Record how many named children without fields occur in this production. - children_without_fields_quantity.append(ChildQuantity::one()); - // Record the types of all of the named children without fields. - let children_info = &mut variable_info.children_without_fields; - if children_info.types.is_empty() { - children_info.quantity = ChildQuantity::one(); + let production_field_quantity = production_field_quantities + .entry(field_name) + .or_insert(ChildQuantity::zero()); + + // Inherit the types and quantities of hidden children associated with fields. + if child_is_hidden { + let child_variable_info = &result[child_symbol.index]; + for child_type in &child_variable_info.children.types { + did_change |= sorted_vec_insert(&mut field_info.types, &child_type); + } + production_field_quantity.append(child_variable_info.children.quantity); + } else { + production_field_quantity.append(ChildQuantity::one()); } - did_change |= sorted_vec_insert(&mut children_info.types, &child_type); + } + // Maintain the set of named children without fields within this variable. + else if child_type_is_named(&child_type) { + production_children_without_fields_quantity.append(ChildQuantity::one()); + did_change |= sorted_vec_insert( + &mut variable_info.children_without_fields.types, + &child_type, + ); } - // Inherit information from any hidden children. - if child_symbol.is_non_terminal() - && !syntax_grammar.supertype_symbols.contains(&child_symbol) - && step.alias.is_none() - && !child_type_is_visible(&child_type) - { + // Inherit all child information from hidden children. + if child_is_hidden && child_symbol.is_non_terminal() { let child_variable_info = &result[child_symbol.index]; - // If a hidden child can have multiple children, then this - // node can appear to have multiple children. + // If a hidden child can have multiple children, then its parent node can + // appear to have multiple children. if child_variable_info.has_multi_step_production { variable_info.has_multi_step_production = true; } - // Inherit fields from this hidden child + // If a hidden child has fields, then the parent node can appear to have + // those same fields. for (field_name, child_field_info) in &child_variable_info.fields { - field_quantities + production_field_quantities .entry(field_name) .or_insert(ChildQuantity::zero()) .append(child_field_info.quantity); let field_info = variable_info .fields .entry(field_name.clone()) - .or_insert(FieldInfo { - types: Vec::new(), - quantity: ChildQuantity::one(), - }); + .or_insert(FieldInfo::default()); for child_type in &child_field_info.types { - sorted_vec_insert(&mut field_info.types, &child_type); + did_change |= sorted_vec_insert(&mut field_info.types, &child_type); } } - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { + // If a hidden child has children, then the parent node can appear to have + // those same children. + production_children_quantity.append(child_variable_info.children.quantity); + for child_type in &child_variable_info.children.types { did_change |= - sorted_vec_insert(&mut variable_info.child_types, child_type); + sorted_vec_insert(&mut variable_info.children.types, child_type); } - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - did_change |= sorted_vec_insert(&mut field_info.types, &child_type); - } - } - // Inherit info about children without fields from this hidden child. - else { + // If a hidden child can have named children without fields, then the parent + // node can appear to have those same children. + if step.field_name.is_none() { let grandchildren_info = &child_variable_info.children_without_fields; if !grandchildren_info.types.is_empty() { - children_without_fields_quantity - .append(grandchildren_info.quantity); - - if variable_info.children_without_fields.types.is_empty() { - variable_info.children_without_fields.quantity = - ChildQuantity::one(); - } - - for child_type in &grandchildren_info.types { + production_children_without_fields_quantity + .append(child_variable_info.children_without_fields.quantity); + for child_type in &child_variable_info.children_without_fields.types + { did_change |= sorted_vec_insert( &mut variable_info.children_without_fields.types, &child_type, @@ -302,22 +289,27 @@ pub(crate) fn get_variable_info( // Note whether or not this production contains children whose summaries // have not yet been computed. if child_symbol.index >= i && !all_initialized { - has_uninitialized_invisible_children = true; + production_has_uninitialized_invisible_children = true; } } // If this production's children all have had their summaries initialized, // then expand the quantity information with all of the possibilities introduced // by this production. - if !has_uninitialized_invisible_children { + if !production_has_uninitialized_invisible_children { + did_change |= variable_info + .children + .quantity + .union(production_children_quantity); + did_change |= variable_info .children_without_fields .quantity - .union(children_without_fields_quantity); + .union(production_children_without_fields_quantity); for (field_name, info) in variable_info.fields.iter_mut() { did_change |= info.quantity.union( - field_quantities + production_field_quantities .get(field_name) .cloned() .unwrap_or(ChildQuantity::zero()), @@ -352,7 +344,8 @@ pub(crate) fn get_variable_info( // Update all of the node type lists to eliminate hidden nodes. for supertype_symbol in &syntax_grammar.supertype_symbols { result[supertype_symbol.index] - .child_types + .children + .types .retain(child_type_is_visible); } for variable_info in result.iter_mut() { @@ -467,7 +460,8 @@ pub(crate) fn generate_node_types_json( subtypes: None, }); let mut subtypes = info - .child_types + .children + .types .iter() .map(child_type_to_node_type) .collect::>(); @@ -1461,6 +1455,71 @@ mod tests { ); } + #[test] + fn test_get_variable_info_with_repetitions_inside_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + // Field associated with a repetition. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1")], + }, + Production { + dynamic_precedence: 0, + steps: vec![], + }, + ], + }, + // Repetition node + SyntaxVariable { + name: "_rule0_repeat".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::non_terminal(1)), + ], + }, + ], + }, + ], + vec![], + ), + &build_lexical_grammar(), + &AliasMap::new(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + quantity: ChildQuantity { + exists: true, + required: false, + multiple: true, + }, + types: vec![ChildType::Normal(Symbol::terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + #[test] fn test_get_variable_info_with_inherited_fields() { let variable_info = get_variable_info( From 12341dbbc03075e0b3bdcbf05191efbac78731fe Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Jul 2020 14:23:54 -0700 Subject: [PATCH 114/282] 0.16.9 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cdad3b6167..117ac49e6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.16.8" +version = "0.16.9" dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 0d85952f5f..52a2ed6b9f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.16.8" +version = "0.16.9" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 738c5622d6..01afe1075f 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.16.8", + "version": "0.16.9", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 82aa1462fd9f4b0d3a27dc2241318d6dbd0f6830 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 17 Jul 2020 15:12:13 -0700 Subject: [PATCH 115/282] Clean up get_variable_info function --- cli/src/generate/node_types.rs | 67 ++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 6df408074b..039d7190c9 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -196,7 +196,8 @@ pub(crate) fn get_variable_info( // Maintain the set of all child types for this variable, and the quantity of // visible children in this production. - did_change |= sorted_vec_insert(&mut variable_info.children.types, &child_type); + did_change |= + extend_sorted(&mut variable_info.children.types, Some(&child_type)); if !child_is_hidden { production_children_quantity.append(ChildQuantity::one()); } @@ -208,7 +209,7 @@ pub(crate) fn get_variable_info( .fields .entry(field_name.clone()) .or_insert(FieldInfo::default()); - did_change |= sorted_vec_insert(&mut field_info.types, &child_type); + did_change |= extend_sorted(&mut field_info.types, Some(&child_type)); let production_field_quantity = production_field_quantities .entry(field_name) @@ -217,9 +218,10 @@ pub(crate) fn get_variable_info( // Inherit the types and quantities of hidden children associated with fields. if child_is_hidden { let child_variable_info = &result[child_symbol.index]; - for child_type in &child_variable_info.children.types { - did_change |= sorted_vec_insert(&mut field_info.types, &child_type); - } + did_change |= extend_sorted( + &mut field_info.types, + &child_variable_info.children.types, + ); production_field_quantity.append(child_variable_info.children.quantity); } else { production_field_quantity.append(ChildQuantity::one()); @@ -228,9 +230,9 @@ pub(crate) fn get_variable_info( // Maintain the set of named children without fields within this variable. else if child_type_is_named(&child_type) { production_children_without_fields_quantity.append(ChildQuantity::one()); - did_change |= sorted_vec_insert( + did_change |= extend_sorted( &mut variable_info.children_without_fields.types, - &child_type, + Some(&child_type), ); } @@ -251,22 +253,23 @@ pub(crate) fn get_variable_info( .entry(field_name) .or_insert(ChildQuantity::zero()) .append(child_field_info.quantity); - let field_info = variable_info - .fields - .entry(field_name.clone()) - .or_insert(FieldInfo::default()); - for child_type in &child_field_info.types { - did_change |= sorted_vec_insert(&mut field_info.types, &child_type); - } + did_change |= extend_sorted( + &mut variable_info + .fields + .entry(field_name.clone()) + .or_insert(FieldInfo::default()) + .types, + &child_field_info.types, + ); } // If a hidden child has children, then the parent node can appear to have // those same children. production_children_quantity.append(child_variable_info.children.quantity); - for child_type in &child_variable_info.children.types { - did_change |= - sorted_vec_insert(&mut variable_info.children.types, child_type); - } + did_change |= extend_sorted( + &mut variable_info.children.types, + &child_variable_info.children.types, + ); // If a hidden child can have named children without fields, then the parent // node can appear to have those same children. @@ -275,13 +278,10 @@ pub(crate) fn get_variable_info( if !grandchildren_info.types.is_empty() { production_children_without_fields_quantity .append(child_variable_info.children_without_fields.quantity); - for child_type in &child_variable_info.children_without_fields.types - { - did_change |= sorted_vec_insert( - &mut variable_info.children_without_fields.types, - &child_type, - ); - } + did_change |= extend_sorted( + &mut variable_info.children_without_fields.types, + &child_variable_info.children_without_fields.types, + ); } } } @@ -680,16 +680,19 @@ fn variable_type_for_child_type( } } -fn sorted_vec_insert(vec: &mut Vec, value: &T) -> bool +fn extend_sorted<'a, T>(vec: &mut Vec, values: impl IntoIterator) -> bool where T: Clone + Eq + Ord, + T: 'a, { - if let Err(i) = vec.binary_search(&value) { - vec.insert(i, value.clone()); - true - } else { - false - } + values.into_iter().any(|value| { + if let Err(i) = vec.binary_search(&value) { + vec.insert(i, value.clone()); + true + } else { + false + } + }) } #[cfg(test)] From 740d864e678ab0c5518780afd906e2123d8a9d79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tu=E1=BA=A5n-Anh=20Nguy=E1=BB=85n?= Date: Sun, 19 Jul 2020 12:40:17 +0700 Subject: [PATCH 116/282] Add '.' as a valid start of a predicate, in addition to '#' See https://github.com/ubolonton/emacs-tree-sitter/issues/38 --- cli/src/tests/query_test.rs | 29 +++++++++++++++++++++++++++++ lib/src/query.c | 4 ++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 06ecc42ea5..493bea8a45 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2087,6 +2087,35 @@ fn test_query_disable_pattern() { }); } +#[test] +fn test_query_alternative_predicate_prefix() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new(language, r#" + ((call_expression + function: (identifier) @keyword + arguments: (argument_list + (string_literal) @function)) + (.eq? @keyword "DEFUN")) + "#).unwrap(); + let source = r#" + DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0, + doc: /* Return the argument unchanged. */ + attributes: const) + (Lisp_Object arg) + { + return arg; + } + "#; + assert_query_matches( + language, + &query, + source, + &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])], + ); + }); +} + fn assert_query_matches( language: Language, query: &Query, diff --git a/lib/src/query.c b/lib/src/query.c index b95ba0574d..acce2c72fc 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -805,8 +805,8 @@ static TSQueryError ts_query__parse_pattern( } } - // A pound character indicates the start of a predicate. - else if (stream->next == '#') { + // A dot/pound character indicates the start of a predicate. + else if (stream->next == '.' || stream->next == '#') { stream_advance(stream); return ts_query__parse_predicate(self, stream); } From ba70927f573b0d098046da77888d3219ee31cc9d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 20 Jul 2020 16:46:45 -0700 Subject: [PATCH 117/282] tags: Skip tags with a parse error inside the name --- tags/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index dcbb9984d0..7733f3e3b8 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -372,6 +372,10 @@ where } if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) { + if name_node.has_error() { + continue; + } + let name_range = name_node.byte_range(); if pattern_info.name_must_be_non_local { From a3b440b0c89763bb0b2e49f2a94144accc13462b Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Thu, 23 Jul 2020 09:48:18 +0200 Subject: [PATCH 118/282] size_t variables need %zu, not %lu --- lib/src/alloc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index 9bbf751335..0e0927a928 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -45,7 +45,7 @@ static inline bool ts_toggle_allocation_recording(bool value) { static inline void *ts_malloc(size_t size) { void *result = malloc(size); if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size); + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); exit(1); } return result; @@ -54,7 +54,7 @@ static inline void *ts_malloc(size_t size) { static inline void *ts_calloc(size_t count, size_t size) { void *result = calloc(count, size); if (count > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size); + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); exit(1); } return result; @@ -63,7 +63,7 @@ static inline void *ts_calloc(size_t count, size_t size) { static inline void *ts_realloc(void *buffer, size_t size) { void *result = realloc(buffer, size); if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size); + fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); exit(1); } return result; From de2b71d465919cc361d45a4abecb867b12fdd6d4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 23 Jul 2020 16:05:50 -0700 Subject: [PATCH 119/282] Fix query bug when max permutations are exceeded --- cli/src/loader.rs | 4 +- cli/src/tests/query_test.rs | 45 +++++++++++- lib/src/query.c | 138 +++++++++++++++++++++--------------- 3 files changed, 125 insertions(+), 62 deletions(-) diff --git a/cli/src/loader.rs b/cli/src/loader.rs index cf2eb1432b..62cc9b62e5 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -160,7 +160,9 @@ impl Loader { // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { - let file_contents = fs::read_to_string(path)?; + let file_contents = fs::read(path) + .map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?; + let file_contents = String::from_utf8_lossy(&file_contents); let mut best_score = -2isize; let mut best_configuration_id = None; for configuration_id in configuration_ids { diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 493bea8a45..c304f3b4a9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1135,6 +1135,43 @@ fn test_query_matches_with_too_many_permutations_to_track() { }); } +#[test] +fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + ( + (comment) @doc + ; not immediate + (class_declaration) @class + ) + + (call_expression + function: [ + (identifier) @function + (member_expression property: (property_identifier) @method) + ]) + ", + ) + .unwrap(); + + let source = "/* hi */ a.b(); ".repeat(50); + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); + + assert_eq!( + collect_matches(matches, &query, source.as_str()), + vec![(1, vec![("method", "b")]); 50], + ); + }); +} + #[test] fn test_query_matches_with_anonymous_tokens() { allocations::record(|| { @@ -2091,13 +2128,17 @@ fn test_query_disable_pattern() { fn test_query_alternative_predicate_prefix() { allocations::record(|| { let language = get_language("c"); - let query = Query::new(language, r#" + let query = Query::new( + language, + r#" ((call_expression function: (identifier) @keyword arguments: (argument_list (string_literal) @function)) (.eq? @keyword "DEFUN")) - "#).unwrap(); + "#, + ) + .unwrap(); let source = r#" DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0, doc: /* Return the argument unchanged. */ diff --git a/lib/src/query.c b/lib/src/query.c index acce2c72fc..05c767e184 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -122,6 +122,7 @@ typedef struct { uint16_t consumed_capture_count: 14; bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; + bool dead: 1; } QueryState; typedef Array(TSQueryCapture) CaptureList; @@ -1365,6 +1366,7 @@ static bool ts_query_cursor__first_in_progress_capture( *pattern_index = UINT32_MAX; for (unsigned i = 0; i < self->states.size; i++) { const QueryState *state = &self->states.contents[i]; + if (state->dead) continue; const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id @@ -1480,44 +1482,88 @@ static bool ts_query_cursor__add_state( .start_depth = self->depth - step->depth, .consumed_capture_count = 0, .seeking_immediate_match = false, + .has_in_progress_alternatives = false, + .dead = false, })); return true; } +// Acquire a capture list for this state. If there are no capture lists left in the +// pool, this will steal the capture list from another existing state, and mark that +// other state as 'dead'. +static CaptureList *ts_query_cursor__prepare_to_capture( + TSQueryCursor *self, + QueryState *state, + unsigned state_index_to_preserve +) { + if (state->capture_list_id == NONE) { + state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); + + // If there are no capture lists left in the pool, then terminate whichever + // state has captured the earliest node in the document, and steal its + // capture list. + if (state->capture_list_id == NONE) { + uint32_t state_index, byte_offset, pattern_index; + if ( + ts_query_cursor__first_in_progress_capture( + self, + &state_index, + &byte_offset, + &pattern_index + ) && + state_index != state_index_to_preserve + ) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + state_index, pattern_index, byte_offset + ); + QueryState *other_state = &self->states.contents[state_index]; + state->capture_list_id = other_state->capture_list_id; + other_state->capture_list_id = NONE; + other_state->dead = true; + CaptureList *list = capture_list_pool_get_mut( + &self->capture_list_pool, + state->capture_list_id + ); + array_clear(list); + return list; + } else { + LOG(" ran out of capture lists"); + return NULL; + } + } + } + return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); +} + // Duplicate the given state and insert the newly-created state immediately after // the given state in the `states` array. -static QueryState *ts_query__cursor_copy_state( +static QueryState *ts_query_cursor__copy_state( TSQueryCursor *self, - const QueryState *state + unsigned state_index ) { if (self->states.size >= MAX_STATE_COUNT) { LOG(" too many states"); return NULL; } - // If the state has captures, copy its capture list. + const QueryState *state = &self->states.contents[state_index]; QueryState copy = *state; - copy.capture_list_id = state->capture_list_id; + copy.capture_list_id = NONE; + + // If the state has captures, copy its capture list. if (state->capture_list_id != NONE) { - copy.capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - if (copy.capture_list_id == NONE) { - LOG(" too many capture lists"); - return NULL; - } + CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); + if (!new_captures) return NULL; const CaptureList *old_captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); - CaptureList *new_captures = capture_list_pool_get_mut( - &self->capture_list_pool, - copy.capture_list_id - ); array_push_all(new_captures, old_captures); } - uint32_t index = (state - self->states.contents) + 1; - array_insert(&self->states, index, copy); - return &self->states.contents[index]; + array_insert(&self->states, state_index + 1, copy); + return &self->states.contents[state_index + 1]; } // Walk the tree, processing patterns until at least one pattern finishes, @@ -1728,7 +1774,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { !step->is_pattern_start && step->contains_captures ) { - if (ts_query__cursor_copy_state(self, state)) { + if (ts_query_cursor__copy_state(self, i)) { LOG( " split state for capture. pattern:%u, step:%u\n", state->pattern_index, @@ -1739,45 +1785,14 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } // If the current node is captured in this pattern, add it to the capture list. - // For the first capture in a pattern, lazily acquire a capture list. if (step->capture_ids[0] != NONE) { - if (state->capture_list_id == NONE) { - state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - - // If there are no capture lists left in the pool, then terminate whichever - // state has captured the earliest node in the document, and steal its - // capture list. - if (state->capture_list_id == NONE) { - uint32_t state_index, byte_offset, pattern_index; - if (ts_query_cursor__first_in_progress_capture( - self, - &state_index, - &byte_offset, - &pattern_index - )) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - state_index, pattern_index, byte_offset - ); - state->capture_list_id = self->states.contents[state_index].capture_list_id; - array_erase(&self->states, state_index); - if (state_index < i) { - i--; - state--; - } - } else { - LOG(" too many finished states.\n"); - array_erase(&self->states, i); - i--; - continue; - } - } + CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); + if (!capture_list) { + array_erase(&self->states, i); + i--; + continue; } - CaptureList *capture_list = capture_list_pool_get_mut( - &self->capture_list_pool, - state->capture_list_id - ); for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { uint16_t capture_id = step->capture_ids[j]; if (step->capture_ids[j] == NONE) break; @@ -1800,10 +1815,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { state->step_index ); - // If this state's next step has an 'alternative' step (the step is either optional, - // or is the end of a repetition), then copy the state in order to pursue both - // alternatives. The alternative step itself may have an alternative, so this is - // an interative process. + // If this state's next step has an alternative step, then copy the state in order + // to pursue both alternatives. The alternative step itself may have an alternative, + // so this is an interative process. unsigned end_index = i + 1; for (unsigned j = i; j < end_index; j++) { QueryState *state = &self->states.contents[j]; @@ -1815,7 +1829,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { continue; } - QueryState *copy = ts_query__cursor_copy_state(self, state); + QueryState *copy = ts_query_cursor__copy_state(self, j); if (next_step->is_pass_through) { state->step_index++; j--; @@ -1841,14 +1855,20 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { for (unsigned i = 0; i < self->states.size; i++) { QueryState *state = &self->states.contents[i]; - bool did_remove = false; + if (state->dead) { + array_erase(&self->states, i); + i--; + continue; + } // Enfore the longest-match criteria. When a query pattern contains optional or // repeated nodes, this is necesssary to avoid multiple redundant states, where // one state has a strict subset of another state's captures. + bool did_remove = false; for (unsigned j = i + 1; j < self->states.size; j++) { QueryState *other_state = &self->states.contents[j]; if ( + !other_state->dead && state->pattern_index == other_state->pattern_index && state->start_depth == other_state->start_depth ) { From 32099050d6d41ff9538c4f7c4991b66254cad024 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 24 Jul 2020 09:26:54 -0700 Subject: [PATCH 120/282] node_types: Fix panic when field is associated with a hidden token Fixes #695 --- cli/src/generate/node_types.rs | 35 +++++++++++++++++++++++++++++++++- cli/src/main.rs | 2 +- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 039d7190c9..7a5768a5bc 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -216,7 +216,7 @@ pub(crate) fn get_variable_info( .or_insert(ChildQuantity::zero()); // Inherit the types and quantities of hidden children associated with fields. - if child_is_hidden { + if child_is_hidden && child_symbol.is_non_terminal() { let child_variable_info = &result[child_symbol.index]; did_change |= extend_sorted( &mut field_info.types, @@ -352,6 +352,7 @@ pub(crate) fn get_variable_info( for (_, field_info) in variable_info.fields.iter_mut() { field_info.types.retain(child_type_is_visible); } + variable_info.fields.retain(|_, v| !v.types.is_empty()); variable_info .children_without_fields .types @@ -1174,6 +1175,38 @@ mod tests { ); } + #[test] + fn test_node_types_with_fields_on_hidden_tokens() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![Variable { + name: "script".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("a".to_string(), Rule::pattern("hi")), + Rule::field("b".to_string(), Rule::pattern("bye")), + ]), + }], + }); + + assert_eq!( + node_types, + [NodeInfoJSON { + kind: "script".to_string(), + named: true, + fields: Some(BTreeMap::new()), + children: None, + subtypes: None + }] + ); + } + #[test] fn test_node_types_with_multiple_rules_same_alias_name() { let node_types = get_node_types(InputGrammar { diff --git a/cli/src/main.rs b/cli/src/main.rs index 0668d08dc3..2f8c6dd544 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -99,7 +99,7 @@ fn run() -> error::Result<()> { .subcommand( SubCommand::with_name("tags") .arg(Arg::with_name("quiet").long("quiet").short("q")) - .arg(Arg::with_name("time").long("quiet").short("t")) + .arg(Arg::with_name("time").long("time").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( From 1ae5cbc851ca55214a59e675240cd2dfd1efb276 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 24 Jul 2020 10:49:20 -0700 Subject: [PATCH 121/282] query: Handle #not-match? in rust, wasm bindings --- cli/src/tests/query_test.rs | 9 ++++++++- lib/binding_rust/lib.rs | 10 ++++++---- lib/binding_web/binding.js | 4 +++- lib/binding_web/test/query-test.js | 9 ++++++++- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c304f3b4a9..914d41cd0b 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1470,12 +1470,17 @@ fn test_query_captures_with_text_conditions() { ((identifier) @function.builtin (#eq? @function.builtin "require")) - (identifier) @variable + ((identifier) @variable + (#not-match? @variable "^(lambda|load)$")) "#, ) .unwrap(); let source = " + toad + load + panda + lambda const ab = require('./ab'); new Cd(EF); "; @@ -1489,6 +1494,8 @@ fn test_query_captures_with_text_conditions() { assert_eq!( collect_captures(captures, &query, source), &[ + ("variable", "toad"), + ("variable", "panda"), ("variable", "ab"), ("function.builtin", "require"), ("variable", "require"), diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index c0aba32f01..ec7cd79116 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -169,7 +169,7 @@ pub enum QueryError { enum TextPredicate { CaptureEqString(u32, String, bool), CaptureEqCapture(u32, u32, bool), - CaptureMatchString(u32, regex::bytes::Regex), + CaptureMatchString(u32, regex::bytes::Regex, bool), } impl Language { @@ -1298,7 +1298,7 @@ impl Query { }); } - "match?" => { + "match?" | "not-match?" => { if p.len() != 3 { return Err(QueryError::Predicate(format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", @@ -1318,12 +1318,14 @@ impl Query { ))); } + let is_positive = operator_name == "match?"; let regex = &string_values[p[2].value_id as usize]; text_predicates.push(TextPredicate::CaptureMatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { QueryError::Predicate(format!("Invalid regex '{}'", regex)) })?, + is_positive, )); } @@ -1607,9 +1609,9 @@ impl<'a> QueryMatch<'a> { let node = self.capture_for_index(*i).unwrap(); (text_callback(node).as_ref() == s.as_bytes()) == *is_positive } - TextPredicate::CaptureMatchString(i, r) => { + TextPredicate::CaptureMatchString(i, r, is_positive) => { let node = self.capture_for_index(*i).unwrap(); - r.is_match(text_callback(node).as_ref()) + r.is_match(text_callback(node).as_ref()) == *is_positive } }) } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 567b7eb317..3a193ef98e 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -784,6 +784,8 @@ class Language { } break; + case 'not-match?': + isPositive = false; case 'match?': if (steps.length !== 3) throw new Error( `Wrong number of arguments to \`#match?\` predicate. Expected 2, got ${steps.length - 1}.` @@ -798,7 +800,7 @@ class Language { const regex = new RegExp(steps[2].value); textPredicates[i].push(function(captures) { for (const c of captures) { - if (c.name === captureName) return regex.test(c.node.text); + if (c.name === captureName) return regex.test(c.node.text) === isPositive; } return false; }); diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index 9dda983489..9d1e24e159 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -126,12 +126,17 @@ describe("Query", () => { it("handles conditions that compare the text of capture to literal strings", () => { tree = parser.parse(` + lambda + panda + load + toad const ab = require('./ab'); new Cd(EF); `); query = JavaScript.query(` - (identifier) @variable + ((identifier) @variable + (#not-match? @variable "^(lambda|load)$")) ((identifier) @function.builtin (#eq? @function.builtin "require")) @@ -145,6 +150,8 @@ describe("Query", () => { const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ + { name: "variable", text: "panda" }, + { name: "variable", text: "toad" }, { name: "variable", text: "ab" }, { name: "variable", text: "require" }, { name: "function.builtin", text: "require" }, From d22240591c2accdc94de466f7352ee56c399a796 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 27 Jul 2020 17:38:32 -0500 Subject: [PATCH 122/282] Docs: document the `set!` predicate I was looking for something like this, I searched the documentation, but I found it in https://github.com/tree-sitter/tree-sitter-javascript/blob/master/queries/injections.scm#L15 --- docs/section-4-syntax-highlighting.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index 0182704b5d..cbf97b664e 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -385,6 +385,14 @@ The following query would specify that the contents of the heredoc should be par (heredoc_end) @injection.language) @injection.content ``` +You can also force the language using the `#set!` predicate. +For example, this will force the language to be always `ruby`. + +``` +((heredoc_body) @injection.content + (#set! injection.language "ruby")) +``` + ## Unit Testing Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting. From 253f23c3d432d75cbb2b4c53f5ca090c1e46ae72 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 28 Jul 2020 13:30:34 -0700 Subject: [PATCH 123/282] Fix error when parse error occurs after non-terminal extra --- lib/src/parser.c | 49 +++++++++++++--------- test/fixtures/error_corpus/ruby_errors.txt | 19 +++++++++ 2 files changed, 48 insertions(+), 20 deletions(-) create mode 100644 test/fixtures/error_corpus/ruby_errors.txt diff --git a/lib/src/parser.c b/lib/src/parser.c index 4d7dc1e50c..035672b8d7 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -355,10 +355,11 @@ static Subtree ts_parser__lex( StackVersion version, TSStateId parse_state ) { - Length start_position = ts_stack_position(self->stack, version); - Subtree external_token = ts_stack_last_external_token(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE; + + Length start_position = ts_stack_position(self->stack, version); + Subtree external_token = ts_stack_last_external_token(self->stack, version); const bool *valid_external_tokens = ts_language_enabled_external_tokens( self->language, lex_mode.external_lex_state @@ -1345,24 +1346,26 @@ static bool ts_parser__advance( ); } -lex: - // Otherwise, re-run the lexer. - if (!lookahead.ptr) { - lookahead = ts_parser__lex(self, version, state); - if (lookahead.ptr) { - ts_parser__set_cached_token(self, position, last_external_token, lookahead); - ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); - } + bool needs_lex = !lookahead.ptr; + for (;;) { + // Otherwise, re-run the lexer. + if (needs_lex) { + needs_lex = false; + lookahead = ts_parser__lex(self, version, state); + + if (lookahead.ptr) { + ts_parser__set_cached_token(self, position, last_external_token, lookahead); + ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); + } - // When parsing a non-terminal extra, a null lookahead indicates the - // end of the rule. The reduction is stored in the EOF table entry. - // After the reduction, the lexer needs to be run again. - else { - ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); + // When parsing a non-terminal extra, a null lookahead indicates the + // end of the rule. The reduction is stored in the EOF table entry. + // After the reduction, the lexer needs to be run again. + else { + ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); + } } - } - for (;;) { // If a cancellation flag or a timeout was provided, then check every // time a fixed number of parse actions has been processed. if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { @@ -1459,8 +1462,10 @@ static bool ts_parser__advance( // (and completing the non-terminal extra rule) run the lexer again based // on the current parse state. if (!lookahead.ptr) { - lookahead = ts_parser__lex(self, version, state); + needs_lex = true; + continue; } + ts_language_table_entry( self->language, state, @@ -1470,6 +1475,11 @@ static bool ts_parser__advance( continue; } + if (!lookahead.ptr) { + ts_stack_pause(self->stack, version, ts_builtin_sym_end); + return true; + } + // If there were no parse actions for the current lookahead token, then // it is not valid in this state. If the current lookahead token is a // keyword, then switch to treating it as the normal word token if that @@ -1509,8 +1519,7 @@ static bool ts_parser__advance( if (ts_parser__breakdown_top_of_stack(self, version)) { state = ts_stack_state(self->stack, version); ts_subtree_release(&self->tree_pool, lookahead); - lookahead = NULL_SUBTREE; - goto lex; + needs_lex = true; continue; } diff --git a/test/fixtures/error_corpus/ruby_errors.txt b/test/fixtures/error_corpus/ruby_errors.txt new file mode 100644 index 0000000000..9c35781c03 --- /dev/null +++ b/test/fixtures/error_corpus/ruby_errors.txt @@ -0,0 +1,19 @@ +========================== +Heredocs with errors +========================== + +joins(<<~SQL( + b +SQL +c + +--- + +(program + (method_call + method: (identifier) + (ERROR (heredoc_beginning)) + arguments: (argument_list + (heredoc_body (heredoc_end)) + (identifier) + (MISSING ")")))) From 81bbdf19f4dc42f5f30c589b3ed449b6150de3de Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Jul 2020 09:50:13 -0700 Subject: [PATCH 124/282] Fix handling of non-terminal extras that share non-extra rules Fixes #701 --- .../generate/build_tables/minimize_parse_table.rs | 3 +++ lib/src/parser.c | 15 ++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 5d8f7f0fb2..aa4801c8af 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -199,6 +199,9 @@ impl<'a> Minimizer<'a> { right_state: &ParseState, group_ids_by_state_id: &Vec, ) -> bool { + if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra { + return true; + } for (token, left_entry) in &left_state.terminal_entries { if let Some(right_entry) = right_state.terminal_entries.get(token) { if self.entries_conflict( diff --git a/lib/src/parser.c b/lib/src/parser.c index 035672b8d7..37d1a1c2fd 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -356,7 +356,10 @@ static Subtree ts_parser__lex( TSStateId parse_state ) { TSLexMode lex_mode = self->language->lex_modes[parse_state]; - if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE; + if (lex_mode.lex_state == (uint16_t)-1) { + LOG("no_lookahead_after_non_terminal_extra"); + return NULL_SUBTREE; + } Length start_position = ts_stack_position(self->stack, version); Subtree external_token = ts_stack_last_external_token(self->stack, version); @@ -762,7 +765,7 @@ static StackVersion ts_parser__reduce( int dynamic_precedence, uint16_t production_id, bool is_fragile, - bool is_extra + bool end_of_non_terminal_extra ) { uint32_t initial_version_count = ts_stack_version_count(self->stack); @@ -833,7 +836,9 @@ static StackVersion ts_parser__reduce( TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); - if (is_extra) parent.ptr->extra = true; + if (end_of_non_terminal_extra && next_state == state) { + parent.ptr->extra = true; + } if (is_fragile || pop.size > 1 || initial_version_count > 1) { parent.ptr->fragile_left = true; parent.ptr->fragile_right = true; @@ -1417,12 +1422,12 @@ static bool ts_parser__advance( case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; - bool is_extra = lookahead.ptr == NULL; + bool end_of_non_terminal_extra = lookahead.ptr == NULL; LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.reduce.symbol, action.params.reduce.child_count, action.params.reduce.dynamic_precedence, action.params.reduce.production_id, - is_fragile, is_extra + is_fragile, end_of_non_terminal_extra ); if (reduction_version != STACK_VERSION_NONE) { last_reduction_version = reduction_version; From 9a7fdd29c263a1fa7778c7ec1cbc812397d88571 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Jul 2020 09:53:07 -0700 Subject: [PATCH 125/282] Add test for non-terminal extras that share non-extra rules --- .../corpus.txt | 23 +++++++ .../grammar.json | 68 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt create mode 100644 test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json diff --git a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt new file mode 100644 index 0000000000..a22d8b8d0f --- /dev/null +++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt @@ -0,0 +1,23 @@ +===== +Extras +===== + +; +%; +%foo:; +; +bar: baz:; +; + +--- + +(program + (statement) + (macro_statement (statement)) + (macro_statement (statement + (label_declaration (identifier)))) + (statement) + (statement + (label_declaration (identifier)) + (label_declaration (identifier))) + (statement)) diff --git a/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json new file mode 100644 index 0000000000..a7f51b8eed --- /dev/null +++ b/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.json @@ -0,0 +1,68 @@ +{ + "name": "extra_non_terminals_with_shared_rules", + + "extras": [ + { "type": "PATTERN", "value": "\\s+" }, + { "type": "SYMBOL", "name": "macro_statement" } + ], + + "rules": { + "program": { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "statement" + } + }, + "statement": { + "type": "SEQ", + "members": [ + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "label_declaration" + } + }, + { + "type": "STRING", + "value": ";" + } + ] + }, + "macro_statement": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "%" + }, + { + "type": "SYMBOL", + "name": "statement" + } + ] + }, + "label_declaration": { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "identifier" + }, + { + "type": "STRING", + "value": ":" + } + ] + }, + "identifier": { + "type": "PATTERN", + "value": "[a-zA-Z]+" + } + }, + "conflicts": [], + "externals": [], + "inline": [], + "supertypes": [] +} From 4ec7d8096853b1b478da3588206eb2a29559efa9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Jul 2020 10:04:05 -0700 Subject: [PATCH 126/282] Mention rule order as the fallback criteria in docs Fixes #702 --- docs/section-3-creating-parsers.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index b075e48873..694f8daeae 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -505,6 +505,8 @@ Grammars often contain multiple tokens that can match the same characters. For e 4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. +5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. + ### Keywords Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this: From e89a19a1588382c24ca807c7e43520efe60e311a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Jul 2020 15:30:13 -0700 Subject: [PATCH 127/282] tags: Add @ignore capture --- cli/src/tests/tags_test.rs | 8 +- tags/src/lib.rs | 252 +++++++++++++++++++++---------------- 2 files changed, 152 insertions(+), 108 deletions(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index f00e83ac2c..3ff1c92bd0 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -68,11 +68,13 @@ const JS_TAG_QUERY: &'static str = r#" const RUBY_TAG_QUERY: &'static str = r#" (method - name: (identifier) @name) @definition.method + name: (_) @name) @definition.method (method_call method: (identifier) @name) @reference.call +(setter (identifier) @ignore) + ((identifier) @name @reference.call (#is-not? local)) "#; @@ -207,7 +209,7 @@ fn test_tags_ruby() { " b = 1 - def foo() + def foo=() c = 1 # a is a method because it is not in scope @@ -239,7 +241,7 @@ fn test_tags_ruby() { )) .collect::>(), &[ - ("foo", "method", (2, 4)), + ("foo=", "method", (2, 4)), ("bar", "call", (7, 4)), ("a", "call", (7, 8)), ("b", "call", (7, 11)), diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 7733f3e3b8..07209e4de0 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -25,6 +25,7 @@ pub struct TagsConfiguration { capture_map: HashMap, doc_capture_index: Option, name_capture_index: Option, + ignore_capture_index: Option, local_scope_capture_index: Option, local_definition_capture_index: Option, tags_pattern_index: usize, @@ -128,12 +129,14 @@ impl TagsConfiguration { let mut syntax_type_names = Vec::new(); let mut doc_capture_index = None; let mut name_capture_index = None; + let mut ignore_capture_index = None; let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { match name.as_str() { "" => continue, "name" => name_capture_index = Some(i as u32), + "ignore" => ignore_capture_index = Some(i as u32), "doc" => doc_capture_index = Some(i as u32), "local.scope" => local_scope_capture_index = Some(i as u32), "local.definition" => local_definition_capture_index = Some(i as u32), @@ -222,6 +225,7 @@ impl TagsConfiguration { capture_map, doc_capture_index, name_capture_index, + ignore_capture_index, tags_pattern_index, local_scope_capture_index, local_definition_capture_index, @@ -311,7 +315,12 @@ where if self.tag_queue.len() > 1 && self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start { - return Some(Ok(self.tag_queue.remove(0).0)); + let tag = self.tag_queue.remove(0).0; + if tag.is_ignored() { + continue; + } else { + return Some(Ok(tag)); + } } } @@ -350,10 +359,16 @@ where let mut syntax_type_id = 0; let mut is_definition = false; let mut docs_adjacent_node = None; + let mut is_ignored = false; for capture in mat.captures { let index = Some(capture.index); + if index == self.config.ignore_capture_index { + is_ignored = true; + name_node = Some(capture.node); + } + if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture { docs_adjacent_node = Some(capture.node); } @@ -371,129 +386,137 @@ where } } - if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) { - if name_node.has_error() { - continue; - } - + if let Some(name_node) = name_node { let name_range = name_node.byte_range(); - if pattern_info.name_must_be_non_local { - let mut is_local = false; - for scope in self.scopes.iter().rev() { - if scope.range.start <= name_range.start - && scope.range.end >= name_range.end - { - if scope - .local_defs - .iter() - .any(|d| d.name == &self.source[name_range.clone()]) + let tag; + if let Some(tag_node) = tag_node { + if name_node.has_error() { + continue; + } + + if pattern_info.name_must_be_non_local { + let mut is_local = false; + for scope in self.scopes.iter().rev() { + if scope.range.start <= name_range.start + && scope.range.end >= name_range.end { - is_local = true; - break; - } - if !scope.inherits { - break; + if scope + .local_defs + .iter() + .any(|d| d.name == &self.source[name_range.clone()]) + { + is_local = true; + break; + } + if !scope.inherits { + break; + } } } + if is_local { + continue; + } } - if is_local { - continue; - } - } - // If needed, filter the doc nodes based on their ranges, selecting - // only the slice that are adjacent to some specified node. - let mut docs_start_index = 0; - if let (Some(docs_adjacent_node), false) = - (docs_adjacent_node, doc_nodes.is_empty()) - { - docs_start_index = doc_nodes.len(); - let mut start_row = docs_adjacent_node.start_position().row; - while docs_start_index > 0 { - let doc_node = &doc_nodes[docs_start_index - 1]; - let prev_doc_end_row = doc_node.end_position().row; - if prev_doc_end_row + 1 >= start_row { - docs_start_index -= 1; - start_row = doc_node.start_position().row; - } else { - break; + // If needed, filter the doc nodes based on their ranges, selecting + // only the slice that are adjacent to some specified node. + let mut docs_start_index = 0; + if let (Some(docs_adjacent_node), false) = + (docs_adjacent_node, doc_nodes.is_empty()) + { + docs_start_index = doc_nodes.len(); + let mut start_row = docs_adjacent_node.start_position().row; + while docs_start_index > 0 { + let doc_node = &doc_nodes[docs_start_index - 1]; + let prev_doc_end_row = doc_node.end_position().row; + if prev_doc_end_row + 1 >= start_row { + docs_start_index -= 1; + start_row = doc_node.start_position().row; + } else { + break; + } } } - } - // Generate a doc string from all of the doc nodes, applying any strip regexes. - let mut docs = None; - for doc_node in &doc_nodes[docs_start_index..] { - if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) { - let content = if let Some(regex) = &pattern_info.doc_strip_regex { - regex.replace_all(content, "").to_string() - } else { - content.to_string() - }; - match &mut docs { - None => docs = Some(content), - Some(d) => { - d.push('\n'); - d.push_str(&content); + // Generate a doc string from all of the doc nodes, applying any strip regexes. + let mut docs = None; + for doc_node in &doc_nodes[docs_start_index..] { + if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) + { + let content = if let Some(regex) = &pattern_info.doc_strip_regex { + regex.replace_all(content, "").to_string() + } else { + content.to_string() + }; + match &mut docs { + None => docs = Some(content), + Some(d) => { + d.push('\n'); + d.push_str(&content); + } } } } - } - let range = tag_node.byte_range(); - let span = name_node.start_position()..name_node.end_position(); - - // Compute tag properties that depend on the text of the containing line. If the - // previous tag occurred on the same line, then reuse results from the previous tag. - let line_range; - let mut prev_utf16_column = 0; - let mut prev_utf8_byte = name_range.start - span.start.column; - let line_info = self.prev_line_info.as_ref().and_then(|info| { - if info.utf8_position.row == span.start.row { - Some(info) + let range = tag_node.byte_range(); + let span = name_node.start_position()..name_node.end_position(); + + // Compute tag properties that depend on the text of the containing line. If the + // previous tag occurred on the same line, then reuse results from the previous tag. + let line_range; + let mut prev_utf16_column = 0; + let mut prev_utf8_byte = name_range.start - span.start.column; + let line_info = self.prev_line_info.as_ref().and_then(|info| { + if info.utf8_position.row == span.start.row { + Some(info) + } else { + None + } + }); + if let Some(line_info) = line_info { + line_range = line_info.line_range.clone(); + if line_info.utf8_position.column <= span.start.column { + prev_utf8_byte = line_info.utf8_byte; + prev_utf16_column = line_info.utf16_column; + } } else { - None - } - }); - if let Some(line_info) = line_info { - line_range = line_info.line_range.clone(); - if line_info.utf8_position.column <= span.start.column { - prev_utf8_byte = line_info.utf8_byte; - prev_utf16_column = line_info.utf16_column; + line_range = self::line_range( + self.source, + name_range.start, + span.start, + MAX_LINE_LEN, + ); } + + let utf16_start_column = prev_utf16_column + + utf16_len(&self.source[prev_utf8_byte..name_range.start]); + let utf16_end_column = + utf16_start_column + utf16_len(&self.source[name_range.clone()]); + let utf16_column_range = utf16_start_column..utf16_end_column; + + self.prev_line_info = Some(LineInfo { + utf8_position: span.end, + utf8_byte: name_range.end, + utf16_column: utf16_end_column, + line_range: line_range.clone(), + }); + tag = Tag { + line_range, + span, + utf16_column_range, + range, + name_range, + docs, + is_definition, + syntax_type_id, + }; + } else if is_ignored { + tag = Tag::ignored(name_range); } else { - line_range = self::line_range( - self.source, - name_range.start, - span.start, - MAX_LINE_LEN, - ); + continue; } - let utf16_start_column = prev_utf16_column - + utf16_len(&self.source[prev_utf8_byte..name_range.start]); - let utf16_end_column = - utf16_start_column + utf16_len(&self.source[name_range.clone()]); - let utf16_column_range = utf16_start_column..utf16_end_column; - - self.prev_line_info = Some(LineInfo { - utf8_position: span.end, - utf8_byte: name_range.end, - utf16_column: utf16_end_column, - line_range: line_range.clone(), - }); - let tag = Tag { - line_range, - span, - utf16_column_range, - range, - name_range, - docs, - is_definition, - syntax_type_id, - }; - // Only create one tag per node. The tag queue is sorted by node position // to allow for fast lookup. match self.tag_queue.binary_search_by_key( @@ -521,6 +544,25 @@ where } } +impl Tag { + fn ignored(name_range: Range) -> Self { + Tag { + name_range, + line_range: 0..0, + span: Point::new(0, 0)..Point::new(0, 0), + utf16_column_range: 0..0, + range: usize::MAX..usize::MAX, + docs: None, + is_definition: false, + syntax_type_id: 0, + } + } + + fn is_ignored(&self) -> bool { + self.range.start == usize::MAX + } +} + impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { From df5510acfc0561e64fc2a89fc21ec286eda4feb4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 30 Jul 2020 12:59:34 -0700 Subject: [PATCH 128/282] query: Remove limit on number of in-progress states --- lib/src/query.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 05c767e184..15827cd750 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -11,7 +11,6 @@ // #define LOG(...) fprintf(stderr, __VA_ARGS__) #define LOG(...) -#define MAX_STATE_COUNT 256 #define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 @@ -1297,8 +1296,8 @@ TSQueryCursor *ts_query_cursor_new(void) { .start_point = {0, 0}, .end_point = POINT_MAX, }; - array_reserve(&self->states, MAX_STATE_COUNT); - array_reserve(&self->finished_states, MAX_CAPTURE_LIST_COUNT); + array_reserve(&self->states, 8); + array_reserve(&self->finished_states, 8); return self; } @@ -1465,10 +1464,6 @@ static bool ts_query_cursor__add_state( TSQueryCursor *self, const PatternEntry *pattern ) { - if (self->states.size >= MAX_STATE_COUNT) { - LOG(" too many states"); - return false; - } LOG( " start state. pattern:%u, step:%u\n", pattern->pattern_index, @@ -1537,17 +1532,14 @@ static CaptureList *ts_query_cursor__prepare_to_capture( } // Duplicate the given state and insert the newly-created state immediately after -// the given state in the `states` array. +// the given state in the `states` array. Ensures that the given state reference is +// still valid, even if the states array is reallocated. static QueryState *ts_query_cursor__copy_state( TSQueryCursor *self, - unsigned state_index + QueryState **state_ref ) { - if (self->states.size >= MAX_STATE_COUNT) { - LOG(" too many states"); - return NULL; - } - - const QueryState *state = &self->states.contents[state_index]; + const QueryState *state = *state_ref; + uint32_t state_index = state - self->states.contents; QueryState copy = *state; copy.capture_list_id = NONE; @@ -1563,6 +1555,7 @@ static QueryState *ts_query_cursor__copy_state( } array_insert(&self->states, state_index + 1, copy); + *state_ref = &self->states.contents[state_index]; return &self->states.contents[state_index + 1]; } @@ -1774,7 +1767,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { !step->is_pattern_start && step->contains_captures ) { - if (ts_query_cursor__copy_state(self, i)) { + if (ts_query_cursor__copy_state(self, &state)) { LOG( " split state for capture. pattern:%u, step:%u\n", state->pattern_index, @@ -1829,7 +1822,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { continue; } - QueryState *copy = ts_query_cursor__copy_state(self, j); + QueryState *copy = ts_query_cursor__copy_state(self, &state); if (next_step->is_pass_through) { state->step_index++; j--; @@ -1862,7 +1855,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } // Enfore the longest-match criteria. When a query pattern contains optional or - // repeated nodes, this is necesssary to avoid multiple redundant states, where + // repeated nodes, this is necessary to avoid multiple redundant states, where // one state has a strict subset of another state's captures. bool did_remove = false; for (unsigned j = i + 1; j < self->states.size; j++) { From 411f69d13be8954baff074f4180ae4fdb5537453 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 30 Jul 2020 13:34:34 -0700 Subject: [PATCH 129/282] query: Optimize 'longest-match' filtering --- lib/src/query.c | 85 ++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 15827cd750..c839c29978 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -118,7 +118,7 @@ typedef struct { uint16_t step_index; uint16_t pattern_index; uint16_t capture_list_id; - uint16_t consumed_capture_count: 14; + uint16_t consumed_capture_count: 12; bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; bool dead: 1; @@ -1860,47 +1860,54 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { bool did_remove = false; for (unsigned j = i + 1; j < self->states.size; j++) { QueryState *other_state = &self->states.contents[j]; + if (other_state->dead) { + array_erase(&self->states, j); + j--; + continue; + } + + // When query states are copied in order if ( - !other_state->dead && - state->pattern_index == other_state->pattern_index && - state->start_depth == other_state->start_depth - ) { - bool left_contains_right, right_contains_left; - ts_query_cursor__compare_captures( - self, - state, - other_state, - &left_contains_right, - &right_contains_left - ); - if (left_contains_right) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, j); - j--; - continue; - } - other_state->has_in_progress_alternatives = true; + other_state->start_depth != state->start_depth || + other_state->pattern_index != state->pattern_index + ) break; + + bool left_contains_right, right_contains_left; + ts_query_cursor__compare_captures( + self, + state, + other_state, + &left_contains_right, + &right_contains_left + ); + if (left_contains_right) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); + array_erase(&self->states, j); + j--; + continue; } - if (right_contains_left) { - if (state->step_index == other_state->step_index) { - LOG( - " drop shorter state. pattern: %u, step_index: %u\n", - state->pattern_index, - state->step_index - ); - capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->states, i); - did_remove = true; - break; - } - state->has_in_progress_alternatives = true; + other_state->has_in_progress_alternatives = true; + } + if (right_contains_left) { + if (state->step_index == other_state->step_index) { + LOG( + " drop shorter state. pattern: %u, step_index: %u\n", + state->pattern_index, + state->step_index + ); + capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); + array_erase(&self->states, i); + i--; + did_remove = true; + break; } + state->has_in_progress_alternatives = true; } } From f265e63d488d14e06d905b2ddabe879afdb62945 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 30 Jul 2020 13:35:04 -0700 Subject: [PATCH 130/282] tags: Allow def or ref node to be a sibling of the name node --- tags/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 07209e4de0..c247c13ebc 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -459,7 +459,8 @@ where } } - let range = tag_node.byte_range(); + let rng = tag_node.byte_range(); + let range = rng.start.min(name_range.start)..rng.end.max(name_range.end); let span = name_node.start_position()..name_node.end_position(); // Compute tag properties that depend on the text of the containing line. If the From af655547e5817efbdf350935555b4aaf2642c618 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 31 Jul 2020 12:47:06 -0700 Subject: [PATCH 131/282] Fix handling of queries with many patterns with leading repetitions --- cli/src/tests/query_test.rs | 53 ++++++++++++++++- lib/src/query.c | 115 ++++++++++++++++++++++++++---------- 2 files changed, 135 insertions(+), 33 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 914d41cd0b..a377ca517c 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -382,7 +382,7 @@ fn test_query_matches_with_many_overlapping_results() { ) .unwrap(); - let count = 80; + let count = 1024; // Deeply nested chained function calls: // a @@ -547,8 +547,8 @@ fn test_query_matches_with_immediate_siblings() { &[ (0, vec![("parent", "a"), ("child", "b")]), (0, vec![("parent", "b"), ("child", "c")]), - (1, vec![("last-child", "d")]), (0, vec![("parent", "c"), ("child", "d")]), + (1, vec![("last-child", "d")]), (2, vec![("first-element", "w")]), (2, vec![("first-element", "1")]), ], @@ -732,6 +732,55 @@ fn test_query_matches_with_nested_repetitions() { }); } +#[test] +fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + + // When this query sees a comment, it must keep track of several potential + // matches: up to two for each pattern that begins with a comment. + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @name)) @ref.method + + ((comment)* @doc (function_declaration)) + ((comment)* @doc (generator_function_declaration)) + ((comment)* @doc (class_declaration)) + ((comment)* @doc (lexical_declaration)) + ((comment)* @doc (variable_declaration)) + ((comment)* @doc (method_definition)) + + (comment) @comment + "#, + ) + .unwrap(); + + // Here, a series of comments occurs in the middle of a match of the first + // pattern. To avoid exceeding the storage limits and discarding that outer + // match, the comment-related matches need to be managed efficiently. + let source = format!( + "theObject\n{}\n.theMethod()", + " // the comment\n".repeat(64) + ); + + assert_query_matches( + language, + &query, + &source, + &vec![(7, vec![("comment", "// the comment")]); 64] + .into_iter() + .chain(vec![( + 0, + vec![("ref.method", source.as_str()), ("name", "theMethod")], + )]) + .collect::>(), + ); + }); +} + #[test] fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index c839c29978..8c8bd4c354 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -48,7 +48,6 @@ typedef struct { uint16_t alternative_index; uint16_t depth; bool contains_captures: 1; - bool is_pattern_start: 1; bool is_immediate: 1; bool is_last_child: 1; bool is_pass_through: 1; @@ -449,7 +448,6 @@ static QueryStep query_step__new( .alternative_index = NONE, .contains_captures = false, .is_last_child = false, - .is_pattern_start = false, .is_pass_through = false, .is_dead_end = false, .is_immediate = is_immediate, @@ -547,6 +545,23 @@ static inline void ts_query__pattern_map_insert( ) { uint32_t index; ts_query__pattern_map_search(self, symbol, &index); + + // Ensure that the entries are sorted not only by symbol, but also + // by pattern_index. This way, states for earlier patterns will be + // initiated first, which allows the ordering of the states array + // to be maintained more efficiently. + while (index < self->pattern_map.size) { + PatternEntry *entry = &self->pattern_map.contents[index]; + if ( + self->steps.contents[entry->step_index].symbol == symbol && + entry->pattern_index < pattern_index + ) { + index++; + } else { + break; + } + } + array_insert(&self->pattern_map, index, ((PatternEntry) { .step_index = start_step_index, .pattern_index = pattern_index, @@ -1168,7 +1183,6 @@ TSQuery *ts_query_new( // Maintain a map that can look up patterns for a given root symbol. for (;;) { QueryStep *step = &self->steps.contents[start_step_index]; - step->is_pattern_start = true; ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index); if (step->symbol == WILDCARD_SYMBOL) { self->wildcard_root_pattern_count++; @@ -1178,6 +1192,7 @@ TSQuery *ts_query_new( // then add multiple entries to the pattern map. if (step->alternative_index != NONE) { start_step_index = step->alternative_index; + step->alternative_index = NONE; } else { break; } @@ -1460,27 +1475,62 @@ void ts_query_cursor__compare_captures( } } -static bool ts_query_cursor__add_state( +static void ts_query_cursor__add_state( TSQueryCursor *self, const PatternEntry *pattern ) { + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; + + // Keep the states array in ascending order of start_depth and pattern_index, + // so that it can be processed more efficiently elsewhere. Usually, there is + // no work to do here because of two facts: + // * States with lower start_depth are naturally added first due to the + // order in which nodes are visited. + // * Earlier patterns are naturally added first because of the ordering of the + // pattern_map data structure that's used to initiate matches. + // + // This loop is only needed in cases where two conditions hold: + // * A pattern consists of more than one sibling node, so that its states + // remain in progress after exiting the node that started the match. + // * The first node in the pattern matches against multiple nodes at the + // same depth. + // + // An example of this is the pattern '((comment)* (function))'. If multiple + // `comment` nodes appear in a row, then we may initiate a new state for this + // pattern while another state for the same pattern is already in progress. + // If there are multiple patterns like this in a query, then this loop will + // need to execute in order to keep the states ordered by pattern_index. + uint32_t index = self->states.size; + while (index > 0) { + QueryState *prev_state = &self->states.contents[index - 1]; + if (prev_state->start_depth < start_depth) break; + if (prev_state->start_depth == start_depth) { + if (prev_state->pattern_index < pattern->pattern_index) break; + if (prev_state->pattern_index == pattern->pattern_index) { + // Avoid unnecessarily inserting an unnecessary duplicate state, + // which would be immediately pruned by the longest-match criteria. + if (prev_state->step_index == pattern->step_index) return; + } + } + index--; + } + LOG( " start state. pattern:%u, step:%u\n", pattern->pattern_index, pattern->step_index ); - QueryStep *step = &self->query->steps.contents[pattern->step_index]; - array_push(&self->states, ((QueryState) { + array_insert(&self->states, index, ((QueryState) { .capture_list_id = NONE, .step_index = pattern->step_index, .pattern_index = pattern->pattern_index, - .start_depth = self->depth - step->depth, + .start_depth = start_depth, .consumed_capture_count = 0, - .seeking_immediate_match = false, + .seeking_immediate_match = true, .has_in_progress_alternatives = false, .dead = false, })); - return true; } // Acquire a capture list for this state. If there are no capture lists left in the @@ -1682,7 +1732,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query_cursor__add_state(self, pattern)) break; + ts_query_cursor__add_state(self, pattern); } // Add new states for any patterns whose root node matches this node. @@ -1694,7 +1744,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query_cursor__add_state(self, pattern)) break; + ts_query_cursor__add_state(self, pattern); // Advance to the next pattern whose root node matches this node. i++; @@ -1762,11 +1812,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // parent, then this query state cannot simply be updated in place. It must be // split into two states: one that matches this node, and one which skips over // this node, to preserve the possibility of matching later siblings. - if ( - later_sibling_can_match && - !step->is_pattern_start && - step->contains_captures - ) { + if (later_sibling_can_match && step->contains_captures) { if (ts_query_cursor__copy_state(self, &state)) { LOG( " split state for capture. pattern:%u, step:%u\n", @@ -1822,25 +1868,27 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { continue; } - QueryState *copy = ts_query_cursor__copy_state(self, &state); if (next_step->is_pass_through) { state->step_index++; j--; } + + QueryState *copy = ts_query_cursor__copy_state(self, &state); if (copy) { - copy_count++; + LOG( + " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", + copy->pattern_index, + copy->step_index, + next_step->alternative_index, + next_step->alternative_is_immediate, + capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size + ); end_index++; + copy_count++; copy->step_index = next_step->alternative_index; if (next_step->alternative_is_immediate) { copy->seeking_immediate_match = true; } - LOG( - " split state for branch. pattern:%u, step:%u, step:%u, immediate:%d\n", - copy->pattern_index, - state->step_index, - copy->step_index, - copy->seeking_immediate_match - ); } } } @@ -1860,13 +1908,11 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { bool did_remove = false; for (unsigned j = i + 1; j < self->states.size; j++) { QueryState *other_state = &self->states.contents[j]; - if (other_state->dead) { - array_erase(&self->states, j); - j--; - continue; - } - // When query states are copied in order + // Query states are kept in ascending order of start_depth and pattern_index. + // Since the longest-match criteria is only used for deduping matches of the same + // pattern and root node, we only need to perform pairwise comparisons within a + // small slice of the states array. if ( other_state->start_depth != state->start_depth || other_state->pattern_index != state->pattern_index @@ -1914,6 +1960,13 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If there the state is at the end of its pattern, remove it from the list // of in-progress states and add it to the list of finished states. if (!did_remove) { + LOG( + " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", + state->pattern_index, + state->start_depth, + state->step_index, + capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size + ); QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->depth == PATTERN_DONE_MARKER) { if (state->has_in_progress_alternatives) { From 1a571ae20877c7bfac1fa59f0cc38027fe669685 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 4 Aug 2020 17:53:47 -0400 Subject: [PATCH 132/282] Add errors_present field to tagging context. --- tags/src/c_lib.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 07e1e19ae3..b93c69a27d 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -52,6 +52,7 @@ pub struct TSTagsBuffer { context: TagsContext, tags: Vec, docs: Vec, + errors_present: bool, } #[no_mangle] @@ -184,6 +185,7 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { context: TagsContext::new(), tags: Vec::with_capacity(64), docs: Vec::with_capacity(64), + errors_present: false, })) } @@ -216,6 +218,12 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { buffer.docs.len() as u32 } +#[no_mangle] +pub extern "C" fn ts_tagger_errors_present(this: *const TSTagsBuffer) -> bool { + let buffer = unwrap_ptr(this); + buffer.errors_present +} + #[no_mangle] pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( this: *mut TSTagger, From 5a52dc2cd700170196753481db1e8aa261e50d50 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 11:18:59 -0400 Subject: [PATCH 133/282] Return an iterator-bool tuple instead of just an iterator. --- cli/src/tags.rs | 3 ++- tags/src/c_lib.rs | 5 ++++- tags/src/lib.rs | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 5ea00f39d1..5e99969324 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -53,7 +53,8 @@ pub fn generate_tags( let source = fs::read(path)?; let t0 = Instant::now(); - for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? { + let (tagged, _) = context.generate_tags(tags_config, &source, Some(&cancellation_flag))?; + for tag in tagged { let tag = tag?; if !quiet { write!( diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index b93c69a27d..84f8c97bfa 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -126,7 +126,10 @@ pub extern "C" fn ts_tagger_tag( .context .generate_tags(config, source_code, cancellation_flag) { - Ok(tags) => tags, + Ok((tags, found_error)) => { + buffer.errors_present = found_error; + tags + } Err(e) => { return match e { Error::InvalidLanguage => TSTagsError::InvalidLanguage, diff --git a/tags/src/lib.rs b/tags/src/lib.rs index c247c13ebc..dd55d4bea6 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -255,7 +255,7 @@ impl TagsContext { config: &'a TagsConfiguration, source: &'a [u8], cancellation_flag: Option<&'a AtomicUsize>, - ) -> Result> + 'a, Error> { + ) -> Result<(impl Iterator> + 'a, bool), Error> { self.parser .set_language(config.language) .map_err(|_| Error::InvalidLanguage)?; @@ -271,7 +271,7 @@ impl TagsContext { .matches(&config.query, tree_ref.root_node(), move |node| { &source[node.byte_range()] }); - Ok(TagsIter { + Ok((TagsIter { _tree: tree, matches, source, @@ -285,7 +285,7 @@ impl TagsContext { inherits: false, local_defs: Vec::new(), }], - }) + }, tree_ref.root_node().has_error())) } } From f4108056b0b5be57441493a279cb22fc3fd95829 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 11:33:04 -0400 Subject: [PATCH 134/282] Remove otiose pattern match. --- cli/src/tags.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 5e99969324..122b58d252 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -53,8 +53,7 @@ pub fn generate_tags( let source = fs::read(path)?; let t0 = Instant::now(); - let (tagged, _) = context.generate_tags(tags_config, &source, Some(&cancellation_flag))?; - for tag in tagged { + for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))?.0 { let tag = tag?; if !quiet { write!( From 5c86a9c654b7f2be39f55039ad114f277aa64a64 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 11:52:07 -0400 Subject: [PATCH 135/282] Fix the tests --- cli/src/tests/tags_test.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 3ff1c92bd0..88e57ec104 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -102,6 +102,7 @@ fn test_tags_python() { let tags = tag_context .generate_tags(&tags_config, source, None) .unwrap() + .0 .collect::, _>>() .unwrap(); @@ -153,6 +154,7 @@ fn test_tags_javascript() { let tags = tag_context .generate_tags(&tags_config, source, None) .unwrap() + .0 .collect::, _>>() .unwrap(); @@ -189,6 +191,7 @@ fn test_tags_columns_measured_in_utf16_code_units() { let tag = tag_context .generate_tags(&tags_config, source, None) .unwrap() + .0 .next() .unwrap() .unwrap(); @@ -229,6 +232,7 @@ fn test_tags_ruby() { let tags = tag_context .generate_tags(&tags_config, source.as_bytes(), None) .unwrap() + .0 .collect::, _>>() .unwrap(); @@ -271,7 +275,7 @@ fn test_tags_cancellation() { .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag)) .unwrap(); - for (i, tag) in tags.enumerate() { + for (i, tag) in tags.0.enumerate() { if i == 150 { cancellation_flag.store(1, Ordering::SeqCst); } From 32f69dbe156030de5ae589d968efc2825bd0485f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 5 Aug 2020 09:06:00 -0700 Subject: [PATCH 136/282] tags, highlight: Limit the size of buffers that are retained in memory --- highlight/src/lib.rs | 16 ++++++++++++---- tags/src/c_lib.rs | 12 ++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index bb11021966..1cffefa2d0 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -10,6 +10,8 @@ use tree_sitter::{ }; const CANCELLATION_CHECK_INTERVAL: usize = 100; +const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; +const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; /// Indicates which highlight should be applied to a region of source code. #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -884,11 +886,13 @@ where impl HtmlRenderer { pub fn new() -> Self { - HtmlRenderer { - html: Vec::new(), - line_offsets: vec![0], + let mut result = HtmlRenderer { + html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY), + line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY), carriage_return_highlight: None, - } + }; + result.line_offsets.push(0); + result } pub fn set_carriage_return_highlight(&mut self, highlight: Option) { @@ -896,6 +900,10 @@ impl HtmlRenderer { } pub fn reset(&mut self) { + self.html.truncate(BUFFER_HTML_RESERVE_CAPACITY); + self.line_offsets.truncate(BUFFER_LINES_RESERVE_CAPACITY); + self.html.shrink_to_fit(); + self.line_offsets.shrink_to_fit(); self.html.clear(); self.line_offsets.clear(); self.line_offsets.push(0); diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 07e1e19ae3..c2bec6ca3a 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -6,6 +6,9 @@ use std::sync::atomic::AtomicUsize; use std::{fmt, slice, str}; use tree_sitter::Language; +const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100; +const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024; + #[repr(C)] #[derive(Debug, PartialEq, Eq)] pub enum TSTagsError { @@ -116,8 +119,13 @@ pub extern "C" fn ts_tagger_tag( let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; if let Some(config) = tagger.languages.get(scope_name) { + buffer.tags.truncate(BUFFER_TAGS_RESERVE_CAPACITY); + buffer.docs.truncate(BUFFER_DOCS_RESERVE_CAPACITY); + buffer.tags.shrink_to_fit(); + buffer.docs.shrink_to_fit(); buffer.tags.clear(); buffer.docs.clear(); + let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) }; let cancellation_flag = unsafe { cancellation_flag.as_ref() }; @@ -182,8 +190,8 @@ pub extern "C" fn ts_tagger_tag( pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { Box::into_raw(Box::new(TSTagsBuffer { context: TagsContext::new(), - tags: Vec::with_capacity(64), - docs: Vec::with_capacity(64), + tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY), + docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY), })) } From 94ab884ee4d0b965c8c16212979e15927976f068 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 12:16:09 -0400 Subject: [PATCH 137/282] Add a test. --- cli/src/tests/tags_test.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 88e57ec104..2b058c0b4e 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -297,6 +297,39 @@ fn test_invalid_capture() { assert_eq!(e, Error::InvalidCapture("method".to_string())); } +#[test] +fn test_tags_with_parse_error() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = br#" + class Fine: pass + class Bad + "#; + + let (tags, failed) = tag_context + .generate_tags(&tags_config, source, None) + .unwrap(); + + let newtags = tags.collect::, _>>().unwrap(); + + assert!(failed, "syntax error should have been detected"); + + assert_eq!( + newtags.iter() + .map(|t| ( + substr(source, &t.name_range), + tags_config.syntax_type_name(t.syntax_type_id) + )) + .collect::>(), + &[ + ("Fine", "class"), + ] + ); +} + + #[test] fn test_tags_via_c_api() { allocations::record(|| { From 7576b0b4485343902f54ab1dbe0464dd7ef4f920 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 12:21:42 -0400 Subject: [PATCH 138/282] Add accessor to the C header. --- tags/include/tree_sitter/tags.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index f2b17075d4..42109beee0 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -88,6 +88,9 @@ uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *); // Get the syntax kinds for a scope. const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len); +// Determine whether a parse error was encountered while tagging. +bool ts_tagger_errors_present(); + #ifdef __cplusplus } #endif From ec6af791af5761130238134e935ad6236aeb151c Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 12:24:39 -0400 Subject: [PATCH 139/282] Bikeshed this name a little bit. --- tags/include/tree_sitter/tags.h | 2 +- tags/src/c_lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index 42109beee0..773113d7d2 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -89,7 +89,7 @@ uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *); const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len); // Determine whether a parse error was encountered while tagging. -bool ts_tagger_errors_present(); +bool ts_tags_buffer_found_parse_error(); #ifdef __cplusplus } diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 84f8c97bfa..8cb5abb447 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -222,7 +222,7 @@ pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { } #[no_mangle] -pub extern "C" fn ts_tagger_errors_present(this: *const TSTagsBuffer) -> bool { +pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { let buffer = unwrap_ptr(this); buffer.errors_present } From f91b19c08947aad20e095a4103cf144794baf16d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 5 Aug 2020 09:57:45 -0700 Subject: [PATCH 140/282] tags, highlight: Avoid completely deallocating buffers when shrinking --- highlight/src/lib.rs | 16 ++++++++++------ tags/src/c_lib.rs | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 1cffefa2d0..e4aebbfb2b 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -900,12 +900,8 @@ impl HtmlRenderer { } pub fn reset(&mut self) { - self.html.truncate(BUFFER_HTML_RESERVE_CAPACITY); - self.line_offsets.truncate(BUFFER_LINES_RESERVE_CAPACITY); - self.html.shrink_to_fit(); - self.line_offsets.shrink_to_fit(); - self.html.clear(); - self.line_offsets.clear(); + shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY); + shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY); self.line_offsets.push(0); } @@ -1069,3 +1065,11 @@ fn injection_for_match<'a>( (language_name, content_node, include_children) } + +fn shrink_and_clear(vec: &mut Vec, capacity: usize) { + if vec.len() > capacity { + vec.truncate(capacity); + vec.shrink_to_fit(); + } + vec.clear(); +} diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index c2bec6ca3a..b0786580ff 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -119,12 +119,8 @@ pub extern "C" fn ts_tagger_tag( let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; if let Some(config) = tagger.languages.get(scope_name) { - buffer.tags.truncate(BUFFER_TAGS_RESERVE_CAPACITY); - buffer.docs.truncate(BUFFER_DOCS_RESERVE_CAPACITY); - buffer.tags.shrink_to_fit(); - buffer.docs.shrink_to_fit(); - buffer.tags.clear(); - buffer.docs.clear(); + shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY); + shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY); let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) }; let cancellation_flag = unsafe { cancellation_flag.as_ref() }; @@ -262,3 +258,11 @@ fn unwrap(result: Result) -> T { abort(); }) } + +fn shrink_and_clear(vec: &mut Vec, capacity: usize) { + if vec.len() > capacity { + vec.truncate(capacity); + vec.shrink_to_fit(); + } + vec.clear(); +} From 8d58a0d33a070af73dd6548d8000e0e7ddd04331 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 5 Aug 2020 13:10:02 -0400 Subject: [PATCH 141/282] Add parameter in the header. --- tags/include/tree_sitter/tags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tags/include/tree_sitter/tags.h b/tags/include/tree_sitter/tags.h index 773113d7d2..4784abbb15 100644 --- a/tags/include/tree_sitter/tags.h +++ b/tags/include/tree_sitter/tags.h @@ -89,7 +89,7 @@ uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *); const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len); // Determine whether a parse error was encountered while tagging. -bool ts_tags_buffer_found_parse_error(); +bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*); #ifdef __cplusplus } From b2e4d3f54f33c0f6dcffa86af7e5ff932a65c251 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Sat, 15 Aug 2020 13:07:13 -0500 Subject: [PATCH 142/282] Document alternations and wildcard nodes These were added in https://github.com/tree-sitter/tree-sitter/pull/615 and https://github.com/tree-sitter/tree-sitter/pull/630 Closes https://github.com/tree-sitter/tree-sitter/issues/704 --- docs/section-2-using-parsers.md | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 5b807b9004..a9f5de0295 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -540,6 +540,51 @@ Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also ) ``` +#### Alternations + +An alternation is written as a pair of square brackets (`[]`) containing a list of alternative patterns. +This is similar to _character classes_ from regular expressions (`[abc]` matches either a, b, or c). + +For example, this pattern would match a call to either a variable or an object property. +In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: + +``` +(call_expression + function: [ + (identifier) @function + (member_expression + property: (property_identifier) @method) + ]) +``` + +This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: + +``` +[ + "break" + " atch" + "delete" + "else" + "for" + "function" + "if" + "return" + "try" + "while" +] @keyword +``` + +#### Wildcard Node + +A wildcard node is represented with an underscore (`(_)`), it matches any node. +This is similar to `.` in regular expressions. + +For example, this pattern would match any node inside a call: + +``` +(call (_) @call.inner) +``` + #### Predicates You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. From c3f9b2b377a789160a1ddb1eed5b215e226f5d31 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 17 Aug 2020 09:57:06 -0700 Subject: [PATCH 143/282] Fix query analysis bugs found in ruby tags query --- cli/src/tests/query_test.rs | 14 +++++++++++--- lib/src/query.c | 24 ++++++++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index c3a18d718a..aa5a6744b9 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2310,9 +2310,17 @@ fn test_query_is_definite() { ], }, Row { - language: get_language("javascript"), - pattern: r#""#, - results_by_symbol: &[], + language: get_language("ruby"), + pattern: r#" + (singleton_class + value: (constant) + "end") + "#, + results_by_symbol: &[ + ("singleton_class", false), + ("constant", false), + ("end", true), + ], }, ]; diff --git a/lib/src/query.c b/lib/src/query.c index 15aa2fd1af..52f46918de 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -14,7 +14,7 @@ #define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 #define MAX_STATE_PREDECESSOR_COUNT 100 -#define MAX_ANALYSIS_STATE_DEPTH 4 +#define MAX_ANALYSIS_STATE_DEPTH 8 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -804,7 +804,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index } for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { TSStateId next_state = ts_language_next_state(self->language, state, sym); - if (next_state != 0) { + if (next_state != 0 && next_state != state) { state_predecessor_map_add(&predecessor_map, next_state, state); TSSymbol symbol = self->language->public_symbol_map[sym]; array_search_sorted_by( @@ -873,7 +873,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); for (unsigned j = 0; j < subgraph->nodes.size; j++) { AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; - printf(" {state: %u, child_index: %u, production_id: %u}\n", node->state, node->child_index, node->production_id); + printf( + " {state: %u, child_index: %u, production_id: %u, done: %d}\n", + node->state, node->child_index, node->production_id, node->done + ); } printf("\n"); } @@ -924,23 +927,24 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index #ifdef DEBUG_ANALYZE_QUERY printf("Final step indices:"); for (unsigned j = 0; j < final_step_indices.size; j++) { - printf(" %u", final_step_indices.contents[j]); + printf(" %4u", final_step_indices.contents[j]); } printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol)); for (unsigned j = 0; j < states.size; j++) { AnalysisState *state = &states.contents[j]; - printf(" %3u: {step: %u, stack: [", j, state->step_index); + printf(" %3u: step: %u, stack: [", j, state->step_index); for (unsigned k = 0; k < state->depth; k++) { printf( - " {parent: %s, child_index: %u, field: %s, state: %3u, done:%d}", + " {%s, child: %u, state: %4u", self->language->symbol_names[state->stack[k].parent_symbol], state->stack[k].child_index, - state->stack[k].field_id ? self->language->field_names[state->stack[k].field_id] : "", - state->stack[k].parse_state, - state->stack[k].done + state->stack[k].parse_state ); + if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); + if (state->stack[k].done) printf(", DONE"); + printf("}"); } - printf(" ]}\n"); + printf(" ]\n"); } #endif From 228a9e28e1c19f12a6ca60ea85fab2b5c6c101ab Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 17 Aug 2020 13:27:17 -0700 Subject: [PATCH 144/282] Add tests for impossible queries --- cli/src/tests/query_test.rs | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index aa5a6744b9..15c64afa10 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -186,21 +186,40 @@ fn test_query_errors_on_invalid_conditions() { #[test] fn test_query_errors_on_impossible_patterns() { - allocations::record(|| { - let language = get_language("javascript"); + let js_lang = get_language("javascript"); + let rb_lang = get_language("ruby"); + allocations::record(|| { assert_eq!( Query::new( - language, - "(binary_expression left:(identifier) left:(identifier))" + js_lang, + "(binary_expression left: (identifier) left: (identifier))" ), Err(QueryError::Pattern( 1, - [ - "(binary_expression left:(identifier) left:(identifier))", // - "^" - ] - .join("\n") + "(binary_expression left: (identifier) left: (identifier))\n^".to_string(), + )) + ); + + Query::new( + js_lang, + "(function_declaration name: (identifier) (statement_block))", + ) + .unwrap(); + assert_eq!( + Query::new(js_lang, "(function_declaration name: (statement_block))"), + Err(QueryError::Pattern( + 1, + "(function_declaration name: (statement_block))\n^".to_string(), + )) + ); + + Query::new(rb_lang, "(call receiver:(call))").unwrap(); + assert_eq!( + Query::new(rb_lang, "(call receiver:(binary))"), + Err(QueryError::Pattern( + 1, + "(call receiver:(binary))\n^".to_string(), )) ); }); From 91fc9f5399e4513efb87c1981ff31f9fd1e2e6ec Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 17 Aug 2020 16:50:59 -0700 Subject: [PATCH 145/282] Use is_definite flag in ts_query_cursor_next_capture --- cli/src/tests/query_test.rs | 48 +++++++++ lib/src/query.c | 195 ++++++++++++++++++++---------------- 2 files changed, 157 insertions(+), 86 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 15c64afa10..1df87c74e3 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1952,6 +1952,54 @@ fn test_query_captures_with_too_many_nested_results() { }); } +#[test] +fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (array + "[" @l-bracket + "]" @r-bracket) + + "." @dot + "#, + ) + .unwrap(); + + // The '[' node must be returned before all of the '.' nodes, + // even though its pattern does not finish until the ']' node + // at the end of the document. But because the '[' is definite, + // it can be returned before the pattern finishes matching. + let source = " + [ + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + ] + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + [("l-bracket", "[")] + .iter() + .chain([("dot", "."); 40].iter()) + .chain([("r-bracket", "]")].iter()) + .cloned() + .collect::>(), + ); + }); +} + #[test] fn test_query_captures_ordered_by_both_start_and_end_positions() { allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 52f46918de..a7bc9b81ee 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -2034,7 +2034,8 @@ static bool ts_query_cursor__first_in_progress_capture( TSQueryCursor *self, uint32_t *state_index, uint32_t *byte_offset, - uint32_t *pattern_index + uint32_t *pattern_index, + bool *is_definite ) { bool result = false; *state_index = UINT32_MAX; @@ -2047,13 +2048,20 @@ static bool ts_query_cursor__first_in_progress_capture( &self->capture_list_pool, state->capture_list_id ); - if (captures->size > 0) { - uint32_t capture_byte = ts_node_start_byte(captures->contents[0].node); + if (captures->size > state->consumed_capture_count) { + uint32_t capture_byte = ts_node_start_byte(captures->contents[state->consumed_capture_count].node); if ( !result || capture_byte < *byte_offset || (capture_byte == *byte_offset && state->pattern_index < *pattern_index) ) { + QueryStep *step = &self->query->steps.contents[state->step_index]; + if (is_definite) { + *is_definite = step->is_definite; + } else if (step->is_definite) { + continue; + } + result = true; *state_index = i; *byte_offset = capture_byte; @@ -2216,7 +2224,8 @@ static CaptureList *ts_query_cursor__prepare_to_capture( self, &state_index, &byte_offset, - &pattern_index + &pattern_index, + NULL ) && state_index != state_index_to_preserve ) { @@ -2275,7 +2284,10 @@ static QueryState *ts_query_cursor__copy_state( // If one or more patterns finish, return `true` and store their states in the // `finished_states` array. Multiple patterns can finish on the same node. If // there are no more matches, return `false`. -static inline bool ts_query_cursor__advance(TSQueryCursor *self) { +static inline bool ts_query_cursor__advance( + TSQueryCursor *self, + bool stop_on_definite_step +) { bool did_match = false; for (;;) { if (self->halted) { @@ -2290,6 +2302,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { if (did_match || self->halted) return did_match; + // Exit the current node. if (self->ascending) { LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); @@ -2342,7 +2355,10 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { } } self->states.size -= deleted_count; - } else { + } + + // Enter a new node. + else { // If this node is before the selected range, then avoid descending into it. TSNode node = ts_tree_cursor_current_node(&self->cursor); if ( @@ -2516,6 +2532,9 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { state->step_index ); + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if (stop_on_definite_step && next_step->is_definite) did_match = true; + // If this state's next step has an alternative step, then copy the state in order // to pursue both alternatives. The alternative step itself may have an alternative, // so this is an interative process. @@ -2660,7 +2679,7 @@ bool ts_query_cursor_next_match( TSQueryMatch *match ) { if (self->finished_states.size == 0) { - if (!ts_query_cursor__advance(self)) { + if (!ts_query_cursor__advance(self, false)) { return false; } } @@ -2701,99 +2720,103 @@ bool ts_query_cursor_next_capture( TSQueryMatch *match, uint32_t *capture_index ) { + // The goal here is to return captures in order, even though they may not + // be discovered in order, because patterns can overlap. Search for matches + // until there is a finished capture that is before any unfinished capture. for (;;) { - // The goal here is to return captures in order, even though they may not - // be discovered in order, because patterns can overlap. If there are any - // finished patterns, then try to find one that contains a capture that - // is *definitely* before any capture in an *unfinished* pattern. - if (self->finished_states.size > 0) { - // First, identify the position of the earliest capture in an unfinished - // match. For a finished capture to be returned, it must be *before* - // this position. - uint32_t first_unfinished_capture_byte; - uint32_t first_unfinished_pattern_index; - uint32_t first_unfinished_state_index; - ts_query_cursor__first_in_progress_capture( - self, - &first_unfinished_state_index, - &first_unfinished_capture_byte, - &first_unfinished_pattern_index - ); + // First, find the earliest capture in an unfinished match. + uint32_t first_unfinished_capture_byte; + uint32_t first_unfinished_pattern_index; + uint32_t first_unfinished_state_index; + bool first_unfinished_state_is_definite = false; + ts_query_cursor__first_in_progress_capture( + self, + &first_unfinished_state_index, + &first_unfinished_capture_byte, + &first_unfinished_pattern_index, + &first_unfinished_state_is_definite + ); - // Find the earliest capture in a finished match. - int first_finished_state_index = -1; - uint32_t first_finished_capture_byte = first_unfinished_capture_byte; - uint32_t first_finished_pattern_index = first_unfinished_pattern_index; - for (unsigned i = 0; i < self->finished_states.size; i++) { - const QueryState *state = &self->finished_states.contents[i]; - const CaptureList *captures = capture_list_pool_get( - &self->capture_list_pool, - state->capture_list_id + // Then find the earliest capture in a finished match. It must occur + // before the first capture in an *unfinished* match. + QueryState *first_finished_state = NULL; + uint32_t first_finished_capture_byte = first_unfinished_capture_byte; + uint32_t first_finished_pattern_index = first_unfinished_pattern_index; + for (unsigned i = 0; i < self->finished_states.size; i++) { + QueryState *state = &self->finished_states.contents[i]; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + if (captures->size > state->consumed_capture_count) { + uint32_t capture_byte = ts_node_start_byte( + captures->contents[state->consumed_capture_count].node ); - if (captures->size > state->consumed_capture_count) { - uint32_t capture_byte = ts_node_start_byte( - captures->contents[state->consumed_capture_count].node - ); - if ( - capture_byte < first_finished_capture_byte || - ( - capture_byte == first_finished_capture_byte && - state->pattern_index < first_finished_pattern_index - ) - ) { - first_finished_state_index = i; - first_finished_capture_byte = capture_byte; - first_finished_pattern_index = state->pattern_index; - } - } else { - capture_list_pool_release( - &self->capture_list_pool, - state->capture_list_id - ); - array_erase(&self->finished_states, i); - i--; + if ( + capture_byte < first_finished_capture_byte || + ( + capture_byte == first_finished_capture_byte && + state->pattern_index < first_finished_pattern_index + ) + ) { + first_finished_state = state; + first_finished_capture_byte = capture_byte; + first_finished_pattern_index = state->pattern_index; } - } - - // If there is finished capture that is clearly before any unfinished - // capture, then return its match, and its capture index. Internally - // record the fact that the capture has been 'consumed'. - if (first_finished_state_index != -1) { - QueryState *state = &self->finished_states.contents[ - first_finished_state_index - ]; - match->id = state->id; - match->pattern_index = state->pattern_index; - const CaptureList *captures = capture_list_pool_get( + } else { + capture_list_pool_release( &self->capture_list_pool, state->capture_list_id ); - match->captures = captures->contents; - match->capture_count = captures->size; - *capture_index = state->consumed_capture_count; - state->consumed_capture_count++; - return true; + array_erase(&self->finished_states, i); + i--; } + } - if (capture_list_pool_is_empty(&self->capture_list_pool)) { - LOG( - " abandon state. index:%u, pattern:%u, offset:%u.\n", - first_unfinished_state_index, - first_unfinished_pattern_index, - first_unfinished_capture_byte - ); - capture_list_pool_release( - &self->capture_list_pool, - self->states.contents[first_unfinished_state_index].capture_list_id - ); - array_erase(&self->states, first_unfinished_state_index); - } + // If there is finished capture that is clearly before any unfinished + // capture, then return its match, and its capture index. Internally + // record the fact that the capture has been 'consumed'. + QueryState *state; + if (first_finished_state) { + state = first_finished_state; + } else if (first_unfinished_state_is_definite) { + state = &self->states.contents[first_unfinished_state_index]; + } else { + state = NULL; + } + + if (state) { + match->id = state->id; + match->pattern_index = state->pattern_index; + const CaptureList *captures = capture_list_pool_get( + &self->capture_list_pool, + state->capture_list_id + ); + match->captures = captures->contents; + match->capture_count = captures->size; + *capture_index = state->consumed_capture_count; + state->consumed_capture_count++; + return true; + } + + if (capture_list_pool_is_empty(&self->capture_list_pool)) { + LOG( + " abandon state. index:%u, pattern:%u, offset:%u.\n", + first_unfinished_state_index, + first_unfinished_pattern_index, + first_unfinished_capture_byte + ); + capture_list_pool_release( + &self->capture_list_pool, + self->states.contents[first_unfinished_state_index].capture_list_id + ); + array_erase(&self->states, first_unfinished_state_index); } // If there are no finished matches that are ready to be returned, then // continue finding more matches. if ( - !ts_query_cursor__advance(self) && + !ts_query_cursor__advance(self, true) && self->finished_states.size == 0 ) return false; } From 604f9e8148de6debdaf010978e994de93b18b0f0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 18 Aug 2020 10:10:32 -0700 Subject: [PATCH 146/282] query: Assign is_definite correctly for steps within nested sub-patterns --- cli/src/tests/query_test.rs | 25 +++++++++++++++++++++++++ lib/src/query.c | 33 +++++++++++++-------------------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1df87c74e3..5a2fbdc9cb 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2389,6 +2389,31 @@ fn test_query_is_definite() { ("end", true), ], }, + Row { + language: get_language("javascript"), + pattern: r#" + (call_expression + function: (member_expression + property: (property_identifier) @template-tag) + arguments: (template_string)) @template-call + "#, + results_by_symbol: &[("property_identifier", false), ("template_string", false)], + }, + Row { + language: get_language("javascript"), + pattern: r#" + (subscript_expression + object: (member_expression + object: (identifier) @obj + property: (property_identifier) @prop) + "[") + "#, + results_by_symbol: &[ + ("identifier", false), + ("property_identifier", true), + ("[", true), + ], + }, ]; allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index a7bc9b81ee..416e96146c 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1117,16 +1117,20 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index uint32_t child_step_index = parent_step_index + 1; QueryStep *child_step = &self->steps.contents[child_step_index]; while (child_step->depth == parent_depth + 1) { + // Check if there is any way for the pattern to reach this step, but fail + // to reach the end of the sub-pattern. for (unsigned k = 0; k < final_step_indices.size; k++) { uint32_t final_step_index = final_step_indices.contents[k]; if ( final_step_index >= child_step_index && - self->steps.contents[final_step_index].depth != PATTERN_DONE_MARKER + self->steps.contents[final_step_index].depth == child_step->depth ) { child_step->is_definite = false; break; } } + + // Advance to the next child step in this sub-pattern. do { child_step_index++; child_step++; @@ -1136,6 +1140,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index ); } + // If this pattern cannot match, store the pattern index so that it can be + // returned to the caller. if (result && !can_finish_pattern) { unsigned exists; array_search_sorted_by( @@ -1150,27 +1156,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // In order for a step to be definite, all of its child steps must be definite, // and all of its later sibling steps must be definite. Propagate any indefiniteness // upward and backward through the pattern trees. + bool all_later_children_definite = true; for (unsigned i = self->steps.size - 1; i + 1 > 0; i--) { QueryStep *step = &self->steps.contents[i]; - bool all_later_children_definite = true; - unsigned end_step_index = i + 1; - while (end_step_index < self->steps.size) { - QueryStep *child_step = &self->steps.contents[end_step_index]; - if (child_step->depth <= step->depth || child_step->depth == PATTERN_DONE_MARKER) break; - end_step_index++; - } - for (unsigned j = end_step_index - 1; j > i; j--) { - QueryStep *child_step = &self->steps.contents[j]; - if (child_step->depth == step->depth + 1) { - if (all_later_children_definite) { - if (!child_step->is_definite) { - all_later_children_definite = false; - step->is_definite = false; - } - } else { - child_step->is_definite = false; - } - } + if (step->depth == PATTERN_DONE_MARKER) { + all_later_children_definite = true; + } else { + if (!all_later_children_definite) step->is_definite = false; + if (!step->is_definite) all_later_children_definite = false; } } From bd42729a41181a71690e0b99d35346b51fa5c6a8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 18 Aug 2020 13:01:45 -0700 Subject: [PATCH 147/282] query: Avoid early-returning captures due to predicates --- cli/src/tests/query_test.rs | 21 +++++++++++- lib/src/query.c | 66 ++++++++++++++++++++++++++++--------- 2 files changed, 70 insertions(+), 17 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5a2fbdc9cb..a18c3a8b86 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2414,6 +2414,22 @@ fn test_query_is_definite() { ("[", true), ], }, + Row { + language: get_language("javascript"), + pattern: r#" + (subscript_expression + object: (member_expression + object: (identifier) @obj + property: (property_identifier) @prop) + "[" + (#match? @prop "foo")) + "#, + results_by_symbol: &[ + ("identifier", false), + ("property_identifier", false), + ("[", true), + ], + }, ]; allocations::record(|| { @@ -2431,7 +2447,10 @@ fn test_query_is_definite() { query.pattern_is_definite(0, symbol, 0), *is_definite, "Pattern: {:?}, symbol: {}, expected is_definite to be {}", - row.pattern, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), symbol_name, is_definite, ) diff --git a/lib/src/query.c b/lib/src/query.c index 416e96146c..b3bf0b480b 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -91,9 +91,9 @@ typedef struct { } PatternEntry; typedef struct { + Slice steps; Slice predicate_steps; uint32_t start_byte; - uint32_t start_step; } QueryPattern; /* @@ -1146,7 +1146,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index unsigned exists; array_search_sorted_by( &self->patterns, 0, - .start_step, parent_step_index, + .steps.offset, parent_step_index, impossible_index, &exists ); result = false; @@ -1156,12 +1156,45 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // In order for a step to be definite, all of its child steps must be definite, // and all of its later sibling steps must be definite. Propagate any indefiniteness // upward and backward through the pattern trees. - bool all_later_children_definite = true; - for (unsigned i = self->steps.size - 1; i + 1 > 0; i--) { - QueryStep *step = &self->steps.contents[i]; - if (step->depth == PATTERN_DONE_MARKER) { - all_later_children_definite = true; - } else { + Array(uint16_t) predicate_capture_ids = array_new(); + for (unsigned i = 0; i < self->patterns.size; i++) { + QueryPattern *pattern = &self->patterns.contents[i]; + + // Gather all of the captures that are used in predicates for this pattern. + array_clear(&predicate_capture_ids); + for ( + unsigned start = pattern->predicate_steps.offset, + end = start + pattern->predicate_steps.length, + j = start; j < end; j++ + ) { + TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; + if (step->type == TSQueryPredicateStepTypeCapture) { + array_insert_sorted_by(&predicate_capture_ids, 0, , step->value_id); + } + } + + bool all_later_children_definite = true; + for ( + unsigned start = pattern->steps.offset, + end = start + pattern->steps.length, + j = end - 1; j + 1 > start; j-- + ) { + QueryStep *step = &self->steps.contents[j]; + + // If this step has a capture that is used in a predicate, + // then it is not definite. + for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { + uint16_t capture_id = step->capture_ids[k]; + if (capture_id == NONE) break; + unsigned index, exists; + array_search_sorted_by(&predicate_capture_ids, 0, , capture_id, &index, &exists); + if (exists) { + step->is_definite = false; + break; + } + } + + // If a step is not definite, then none of its predecessors can be definite. if (!all_later_children_definite) step->is_definite = false; if (!step->is_definite) all_later_children_definite = false; } @@ -1197,6 +1230,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index array_delete(&next_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); + array_delete(&predicate_capture_ids); state_predecessor_map_delete(&predecessor_map); return result; @@ -1238,7 +1272,6 @@ static TSQueryError ts_query__parse_predicate( predicate_name, length ); - array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, @@ -1249,7 +1282,6 @@ static TSQueryError ts_query__parse_predicate( if (stream->next == ')') { stream_advance(stream); stream_skip_whitespace(stream); - array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeDone, .value_id = 0, @@ -1278,7 +1310,6 @@ static TSQueryError ts_query__parse_predicate( return TSQueryErrorCapture; } - array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeCapture, .value_id = capture_id, @@ -1318,7 +1349,6 @@ static TSQueryError ts_query__parse_predicate( string_content, length ); - array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, @@ -1338,7 +1368,6 @@ static TSQueryError ts_query__parse_predicate( symbol_start, length ); - array_back(&self->patterns)->predicate_steps.length++; array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, @@ -1778,14 +1807,19 @@ TSQuery *ts_query_new( while (stream.input < stream.end) { uint32_t pattern_index = self->patterns.size; uint32_t start_step_index = self->steps.size; + uint32_t start_predicate_step_index = self->predicate_steps.size; array_push(&self->patterns, ((QueryPattern) { - .predicate_steps = (Slice) {.offset = self->predicate_steps.size, .length = 0}, + .steps = (Slice) {.offset = start_step_index}, + .predicate_steps = (Slice) {.offset = start_predicate_step_index}, .start_byte = stream.input - source, - .start_step = self->steps.size, })); *error_type = ts_query__parse_pattern(self, &stream, 0, false); array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); + QueryPattern *pattern = array_back(&self->patterns); + pattern->steps.length = self->steps.size - start_step_index; + pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; + // If any pattern could not be parsed, then report the error information // and terminate. if (*error_type) { @@ -1903,7 +1937,7 @@ bool ts_query_pattern_is_definite( TSSymbol symbol, uint32_t index ) { - uint32_t step_index = self->patterns.contents[pattern_index].start_step; + uint32_t step_index = self->patterns.contents[pattern_index].steps.offset; QueryStep *step = &self->steps.contents[step_index]; for (; step->depth != PATTERN_DONE_MARKER; step++) { bool does_match = symbol ? From aac75e35b1e4c519158f26fe048699d127b1ed10 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 19 Aug 2020 13:15:45 -0700 Subject: [PATCH 148/282] Optimize iteration over state successors during query analysis --- lib/src/language.h | 106 ++++++++++++++++- lib/src/query.c | 290 ++++++++++++++++++++++++--------------------- 2 files changed, 258 insertions(+), 138 deletions(-) diff --git a/lib/src/language.h b/lib/src/language.h index 288c2a2b09..f8fd1ae5c0 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -20,6 +20,22 @@ typedef struct { bool is_reusable; } TableEntry; +typedef struct { + const TSLanguage *language; + const uint16_t *data; + const uint16_t *group_end; + TSStateId state; + uint16_t table_value; + uint16_t section_index; + uint16_t group_count; + bool is_small_state; + + const TSParseAction *actions; + TSSymbol symbol; + TSStateId next_state; + uint16_t action_count; +} LookaheadIterator; + void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *); TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); @@ -62,6 +78,13 @@ static inline bool ts_language_has_reduce_action( return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; } +// Lookup the table value for a given symbol and state. +// +// For non-terminal symbols, the table value represents a successor state. +// For terminal symbols, it represents an index in the actions table. +// For 'large' parse states, this is a direct lookup. For 'small' parse +// states, this requires searching through the symbol groups to find +// the given symbol. static inline uint16_t ts_language_lookup( const TSLanguage *self, TSStateId state, @@ -73,8 +96,8 @@ static inline uint16_t ts_language_lookup( ) { uint32_t index = self->small_parse_table_map[state - self->large_state_count]; const uint16_t *data = &self->small_parse_table[index]; - uint16_t section_count = *(data++); - for (unsigned i = 0; i < section_count; i++) { + uint16_t group_count = *(data++); + for (unsigned i = 0; i < group_count; i++) { uint16_t section_value = *(data++); uint16_t symbol_count = *(data++); for (unsigned i = 0; i < symbol_count; i++) { @@ -87,6 +110,85 @@ static inline uint16_t ts_language_lookup( } } +// Iterate over all of the symbols that are valid in the given state. +// +// For 'large' parse states, this just requires iterating through +// all possible symbols and checking the parse table for each one. +// For 'small' parse states, this exploits the structure of the +// table to only visit the valid symbols. +static inline LookaheadIterator ts_language_lookaheads( + const TSLanguage *self, + TSStateId state +) { + bool is_small_state = + self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && + state >= self->large_state_count; + const uint16_t *data; + const uint16_t *group_end = NULL; + uint16_t group_count = 0; + if (is_small_state) { + uint32_t index = self->small_parse_table_map[state - self->large_state_count]; + data = &self->small_parse_table[index]; + group_end = data + 1; + group_count = *data; + } else { + data = &self->parse_table[state * self->symbol_count] - 1; + } + return (LookaheadIterator) { + .language = self, + .data = data, + .group_end = group_end, + .group_count = group_count, + .is_small_state = is_small_state, + .symbol = UINT16_MAX, + .next_state = 0, + }; +} + +static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) { + // For small parse states, valid symbols are listed explicitly, + // grouped by their value. There's no need to look up the actions + // again until moving to the next group. + if (self->is_small_state) { + self->data++; + if (self->data == self->group_end) { + if (self->group_count == 0) return false; + self->group_count--; + self->table_value = *(self->data++); + unsigned symbol_count = *(self->data++); + self->group_end = self->data + symbol_count; + self->symbol = *self->data; + } else { + self->symbol = *self->data; + return true; + } + } + + // For large parse states, iterate through every symbol until one + // is found that has valid actions. + else { + do { + self->data++; + self->symbol++; + if (self->symbol >= self->language->symbol_count) return false; + self->table_value = *self->data; + } while (!self->table_value); + } + + // Depending on if the symbols is terminal or non-terminal, the table value either + // represents a list of actions or a successor state. + if (self->symbol < self->language->token_count) { + const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value]; + self->action_count = entry->entry.count; + self->actions = (const TSParseAction *)(entry + 1); + self->next_state = 0; + } else { + self->action_count = 0; + self->next_state = self->table_value; + } + return true; +} + static inline TSStateId ts_language_next_state( const TSLanguage *self, TSStateId state, diff --git a/lib/src/query.c b/lib/src/query.c index b3bf0b480b..eba5955f9b 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -599,7 +599,7 @@ static inline int analysis_state__compare( if (self->stack[i].parse_state > other->stack[i].parse_state) return 1; if (self->stack[i].field_id < other->stack[i].field_id) return -1; if (self->stack[i].field_id > other->stack[i].field_id) return 1; - } + } if (self->step_index < other->step_index) return -1; if (self->step_index > other->step_index) return 1; return 0; @@ -769,47 +769,44 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // 3) A list of predecessor states for each state. StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); for (TSStateId state = 1; state < self->language->state_count; state++) { - unsigned subgraph_index = 0, exists; - for (TSSymbol sym = 0; sym < self->language->token_count; sym++) { - unsigned count; - const TSParseAction *actions = ts_language_actions(self->language, state, sym, &count); - for (unsigned i = 0; i < count; i++) { - const TSParseAction *action = &actions[i]; - if (action->type == TSParseActionTypeReduce) { - TSSymbol symbol = self->language->public_symbol_map[action->params.reduce.symbol]; - array_search_sorted_by( - &subgraphs, - 0, - .symbol, - symbol, - &subgraph_index, - &exists - ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { - .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, - .done = true, - })); + unsigned subgraph_index, exists; + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); + while (ts_lookahead_iterator_next(&lookahead_iterator)) { + if (lookahead_iterator.action_count) { + for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { + const TSParseAction *action = &lookahead_iterator.actions[i]; + if (action->type == TSParseActionTypeReduce) { + TSSymbol symbol = self->language->public_symbol_map[action->params.reduce.symbol]; + array_search_sorted_by( + &subgraphs, + 0, + .symbol, + symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { + array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { + .state = state, + .production_id = action->params.reduce.production_id, + .child_index = action->params.reduce.child_count, + .done = true, + })); + } } + } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { + TSStateId next_state = action->params.shift.state; + state_predecessor_map_add(&predecessor_map, next_state, state); } - } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - TSStateId next_state = action->params.shift.state; - state_predecessor_map_add(&predecessor_map, next_state, state); } - } - } - for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { - TSStateId next_state = ts_language_next_state(self->language, state, sym); - if (next_state != 0 && next_state != state) { - state_predecessor_map_add(&predecessor_map, next_state, state); - TSSymbol symbol = self->language->public_symbol_map[sym]; + } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != state) { + state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + TSSymbol symbol = self->language->public_symbol_map[lookahead_iterator.symbol]; array_search_sorted_by( &subgraphs, - subgraph_index, + 0, .symbol, symbol, &subgraph_index, @@ -871,6 +868,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index for (unsigned i = 0; i < subgraphs.size; i++) { AnalysisSubgraph *subgraph = &subgraphs.contents[i]; printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); + for (unsigned j = 0; j < subgraph->start_states.size; j++) { + printf( + " {state: %u}\n", + subgraph->start_states.contents[j] + ); + } for (unsigned j = 0; j < subgraph->nodes.size; j++) { AnalysisSubgraphNode *node = &subgraph->nodes.contents[j]; printf( @@ -985,121 +988,136 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // Follow every possible path in the parse table, but only visit states that // are part of the subgraph for the current symbol. - for (TSSymbol sym = 0; sym < self->language->symbol_count; sym++) { + LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); + while (ts_lookahead_iterator_next(&lookahead_iterator)) { + TSSymbol sym = lookahead_iterator.symbol; + + TSStateId next_parse_state; + if (lookahead_iterator.action_count) { + const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; + if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { + next_parse_state = action->params.shift.state; + } else { + continue; + } + } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != parse_state) { + next_parse_state = lookahead_iterator.next_state; + } else { + continue; + } + AnalysisSubgraphNode successor = { - .state = ts_language_next_state(self->language, parse_state, sym), + .state = next_parse_state, .child_index = child_index + 1, }; - if (successor.state && successor.state != parse_state) { - unsigned node_index; - array_search_sorted_with( - &subgraph->nodes, 0, - analysis_subgraph_node__compare, &successor, - &node_index, &exists - ); - while (node_index < subgraph->nodes.size) { - AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; - if (node->state != successor.state || node->child_index != successor.child_index) break; - - // Use the subgraph to determine what alias and field will eventually be applied - // to this child node. - TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); - TSSymbol visible_symbol = alias - ? alias - : self->language->symbol_metadata[sym].visible - ? self->language->public_symbol_map[sym] - : 0; - TSFieldId field_id = parent_field_id; - if (!field_id) { - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); - for (; field_map != field_map_end; field_map++) { - if (field_map->child_index == child_index) { - field_id = field_map->field_id; - break; - } + unsigned node_index; + array_search_sorted_with( + &subgraph->nodes, 0, + analysis_subgraph_node__compare, &successor, + &node_index, &exists + ); + while (node_index < subgraph->nodes.size) { + AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++]; + if (node->state != successor.state || node->child_index != successor.child_index) break; + + // Use the subgraph to determine what alias and field will eventually be applied + // to this child node. + TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); + TSSymbol visible_symbol = alias + ? alias + : self->language->symbol_metadata[sym].visible + ? self->language->public_symbol_map[sym] + : 0; + TSFieldId field_id = parent_field_id; + if (!field_id) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); + for (; field_map != field_map_end; field_map++) { + if (field_map->child_index == child_index) { + field_id = field_map->field_id; + break; } } + } - AnalysisState next_state = *state; - analysis_state__top(&next_state)->child_index++; - analysis_state__top(&next_state)->parse_state = successor.state; - if (node->done) analysis_state__top(&next_state)->done = true; - - // Determine if this hypothetical child node would match the current step - // of the query pattern. - bool does_match = false; - if (visible_symbol) { - does_match = true; - if (step->symbol == NAMED_WILDCARD_SYMBOL) { - if (!self->language->symbol_metadata[visible_symbol].named) does_match = false; - } else if (step->symbol != WILDCARD_SYMBOL) { - if (step->symbol != visible_symbol) does_match = false; - } - if (step->field && step->field != field_id) { - does_match = false; - } + AnalysisState next_state = *state; + analysis_state__top(&next_state)->child_index++; + analysis_state__top(&next_state)->parse_state = successor.state; + if (node->done) analysis_state__top(&next_state)->done = true; + + // Determine if this hypothetical child node would match the current step + // of the query pattern. + bool does_match = false; + if (visible_symbol) { + does_match = true; + if (step->symbol == NAMED_WILDCARD_SYMBOL) { + if (!self->language->symbol_metadata[visible_symbol].named) does_match = false; + } else if (step->symbol != WILDCARD_SYMBOL) { + if (step->symbol != visible_symbol) does_match = false; } - - // If this is a hidden child, then push a new entry to the stack, in order to - // walk through the children of this child. - else if (sym >= self->language->token_count && next_state.depth < MAX_ANALYSIS_STATE_DEPTH) { - next_state.depth++; - analysis_state__top(&next_state)->parse_state = parse_state; - analysis_state__top(&next_state)->child_index = 0; - analysis_state__top(&next_state)->parent_symbol = sym; - analysis_state__top(&next_state)->field_id = field_id; - analysis_state__top(&next_state)->done = false; - } else { - continue; + if (step->field && step->field != field_id) { + does_match = false; } + } - // Pop from the stack when this state reached the end of its current syntax node. - while (next_state.depth > 0 && analysis_state__top(&next_state)->done) { - next_state.depth--; - } + // If this is a hidden child, then push a new entry to the stack, in order to + // walk through the children of this child. + else if (sym >= self->language->token_count && next_state.depth < MAX_ANALYSIS_STATE_DEPTH) { + next_state.depth++; + analysis_state__top(&next_state)->parse_state = parse_state; + analysis_state__top(&next_state)->child_index = 0; + analysis_state__top(&next_state)->parent_symbol = sym; + analysis_state__top(&next_state)->field_id = field_id; + analysis_state__top(&next_state)->done = false; + } else { + continue; + } - // If this hypothetical child did match the current step of the query pattern, - // then advance to the next step at the current depth. This involves skipping - // over any descendant steps of the current child. - const QueryStep *next_step = step; - if (does_match) { - for (;;) { - next_state.step_index++; - next_step = &self->steps.contents[next_state.step_index]; - if ( - next_step->depth == PATTERN_DONE_MARKER || - next_step->depth <= parent_depth + 1 - ) break; - } - } + // Pop from the stack when this state reached the end of its current syntax node. + while (next_state.depth > 0 && analysis_state__top(&next_state)->done) { + next_state.depth--; + } + // If this hypothetical child did match the current step of the query pattern, + // then advance to the next step at the current depth. This involves skipping + // over any descendant steps of the current child. + const QueryStep *next_step = step; + if (does_match) { for (;;) { - // If this state can make further progress, then add it to the states for the next iteration. - // Otherwise, record the fact that matching can fail at this step of the pattern. - if (!next_step->is_dead_end) { - bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; - if (did_finish_pattern) can_finish_pattern = true; - if (next_state.depth > 0 && !did_finish_pattern) { - array_insert_sorted_with(&next_states, 0, analysis_state__compare, next_state); - } else { - array_insert_sorted_by(&final_step_indices, 0, , next_state.step_index); - } - } - - // If the state has advanced to a step with an alternative step, then add another state at - // that alternative step to the next iteration. + next_state.step_index++; + next_step = &self->steps.contents[next_state.step_index]; if ( - does_match && - next_step->alternative_index != NONE && - next_step->alternative_index > next_state.step_index - ) { - next_state.step_index = next_step->alternative_index; - next_step = &self->steps.contents[next_state.step_index]; + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= parent_depth + 1 + ) break; + } + } + + for (;;) { + // If this state can make further progress, then add it to the states for the next iteration. + // Otherwise, record the fact that matching can fail at this step of the pattern. + if (!next_step->is_dead_end) { + bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; + if (did_finish_pattern) can_finish_pattern = true; + if (next_state.depth > 0 && !did_finish_pattern) { + array_insert_sorted_with(&next_states, 0, analysis_state__compare, next_state); } else { - break; + array_insert_sorted_by(&final_step_indices, 0, , next_state.step_index); } } + + // If the state has advanced to a step with an alternative step, then add another state at + // that alternative step to the next iteration. + if ( + does_match && + next_step->alternative_index != NONE && + next_step->alternative_index > next_state.step_index + ) { + next_state.step_index = next_step->alternative_index; + next_step = &self->steps.contents[next_state.step_index]; + } else { + break; + } } } } From d47346abc076410a531876eb5635d8230c69b72f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 Aug 2020 10:07:22 -0700 Subject: [PATCH 149/282] Avoid pushing duplicate start states in query analysis --- lib/src/query.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/src/query.c b/lib/src/query.c index eba5955f9b..a156beb959 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -599,7 +599,7 @@ static inline int analysis_state__compare( if (self->stack[i].parse_state > other->stack[i].parse_state) return 1; if (self->stack[i].field_id < other->stack[i].field_id) return -1; if (self->stack[i].field_id > other->stack[i].field_id) return 1; - } + } if (self->step_index < other->step_index) return -1; if (self->step_index > other->step_index) return 1; return 0; @@ -814,6 +814,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index ); if (exists) { AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if ( + subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state + ) array_push(&subgraph->start_states, state); } } From 4301110c126b8fabe45a00b20ce965d4043910d8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 Aug 2020 13:06:38 -0700 Subject: [PATCH 150/282] query: Indicate specific step that's impossible --- cli/src/tests/query_test.rs | 69 ++++++++++++----------- lib/binding_rust/bindings.rs | 7 +-- lib/binding_rust/lib.rs | 12 ++-- lib/include/tree_sitter/api.h | 6 +- lib/src/query.c | 100 ++++++++++++++++++++-------------- 5 files changed, 103 insertions(+), 91 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index a18c3a8b86..1e4ea8cc08 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -197,7 +197,11 @@ fn test_query_errors_on_impossible_patterns() { ), Err(QueryError::Pattern( 1, - "(binary_expression left: (identifier) left: (identifier))\n^".to_string(), + [ + "(binary_expression left: (identifier) left: (identifier))", + " ^" + ] + .join("\n"), )) ); @@ -210,7 +214,11 @@ fn test_query_errors_on_impossible_patterns() { Query::new(js_lang, "(function_declaration name: (statement_block))"), Err(QueryError::Pattern( 1, - "(function_declaration name: (statement_block))\n^".to_string(), + [ + "(function_declaration name: (statement_block))", + " ^", + ] + .join("\n") )) ); @@ -219,7 +227,11 @@ fn test_query_errors_on_impossible_patterns() { Query::new(rb_lang, "(call receiver:(binary))"), Err(QueryError::Pattern( 1, - "(call receiver:(binary))\n^".to_string(), + [ + "(call receiver:(binary))", // + " ^", + ] + .join("\n") )) ); }); @@ -2307,55 +2319,52 @@ fn test_query_alternative_predicate_prefix() { } #[test] -fn test_query_is_definite() { +fn test_query_step_is_definite() { struct Row { language: Language, pattern: &'static str, - results_by_symbol: &'static [(&'static str, bool)], + results_by_substring: &'static [(&'static str, bool)], } let rows = &[ Row { language: get_language("python"), pattern: r#"(expression_statement (string))"#, - results_by_symbol: &[("expression_statement", false), ("string", false)], + results_by_substring: &[("expression_statement", false), ("string", false)], }, Row { language: get_language("javascript"), pattern: r#"(expression_statement (string))"#, - results_by_symbol: &[ - ("expression_statement", false), - ("string", false), // string - ], + results_by_substring: &[("expression_statement", false), ("string", false)], }, Row { language: get_language("javascript"), pattern: r#"(object "{" "}")"#, - results_by_symbol: &[("object", false), ("{", true), ("}", true)], + results_by_substring: &[("object", false), ("{", true), ("}", true)], }, Row { language: get_language("javascript"), pattern: r#"(pair (property_identifier) ":")"#, - results_by_symbol: &[("pair", false), ("property_identifier", false), (":", true)], + results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)], }, Row { language: get_language("javascript"), pattern: r#"(object "{" (_) "}")"#, - results_by_symbol: &[("object", false), ("{", false), ("", false), ("}", true)], + results_by_substring: &[("object", false), ("{", false), ("", false), ("}", true)], }, Row { language: get_language("javascript"), pattern: r#"(binary_expression left: (identifier) right: (_))"#, - results_by_symbol: &[ + results_by_substring: &[ ("binary_expression", false), - ("identifier", false), - ("", true), + ("(identifier)", false), + ("(_)", true), ], }, Row { language: get_language("javascript"), pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, - results_by_symbol: &[ + results_by_substring: &[ ("function_declaration", false), ("identifier", true), ("statement_block", true), @@ -2367,7 +2376,7 @@ fn test_query_is_definite() { (function_declaration name: (identifier) body: (statement_block "{" (expression_statement) "}"))"#, - results_by_symbol: &[ + results_by_substring: &[ ("function_declaration", false), ("identifier", false), ("statement_block", false), @@ -2383,7 +2392,7 @@ fn test_query_is_definite() { value: (constant) "end") "#, - results_by_symbol: &[ + results_by_substring: &[ ("singleton_class", false), ("constant", false), ("end", true), @@ -2397,7 +2406,7 @@ fn test_query_is_definite() { property: (property_identifier) @template-tag) arguments: (template_string)) @template-call "#, - results_by_symbol: &[("property_identifier", false), ("template_string", false)], + results_by_substring: &[("property_identifier", false), ("template_string", false)], }, Row { language: get_language("javascript"), @@ -2408,7 +2417,7 @@ fn test_query_is_definite() { property: (property_identifier) @prop) "[") "#, - results_by_symbol: &[ + results_by_substring: &[ ("identifier", false), ("property_identifier", true), ("[", true), @@ -2424,7 +2433,7 @@ fn test_query_is_definite() { "[" (#match? @prop "foo")) "#, - results_by_symbol: &[ + results_by_substring: &[ ("identifier", false), ("property_identifier", false), ("[", true), @@ -2435,23 +2444,17 @@ fn test_query_is_definite() { allocations::record(|| { for row in rows.iter() { let query = Query::new(row.language, row.pattern).unwrap(); - for (symbol_name, is_definite) in row.results_by_symbol { - let mut symbol = 0; - if !symbol_name.is_empty() { - symbol = row.language.id_for_node_kind(symbol_name, true); - if symbol == 0 { - symbol = row.language.id_for_node_kind(symbol_name, false); - } - } + for (substring, is_definite) in row.results_by_substring { + let offset = row.pattern.find(substring).unwrap(); assert_eq!( - query.pattern_is_definite(0, symbol, 0), + query.step_is_definite(offset), *is_definite, - "Pattern: {:?}, symbol: {}, expected is_definite to be {}", + "Pattern: {:?}, substring: {:?}, expected is_definite to be {}", row.pattern .split_ascii_whitespace() .collect::>() .join(" "), - symbol_name, + substring, is_definite, ) } diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index b5ff7a9e16..81cc6f9ad7 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -651,12 +651,7 @@ extern "C" { ) -> *const TSQueryPredicateStep; } extern "C" { - pub fn ts_query_pattern_is_definite( - self_: *const TSQuery, - pattern_index: u32, - symbol: TSSymbol, - step_index: u32, - ) -> bool; + pub fn ts_query_step_is_definite(self_: *const TSQuery, byte_offset: u32) -> bool; } extern "C" { #[doc = " Get the name and length of one of the query\'s captures, or one of the"] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index c601aeccf3..10cd9fc28c 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1467,12 +1467,12 @@ impl Query { unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } } - /// Check if a pattern will definitely match after a certain number of steps - /// have matched. - pub fn pattern_is_definite(&self, pattern_index: usize, symbol: u16, step_index: usize) -> bool { - unsafe { - ffi::ts_query_pattern_is_definite(self.ptr.as_ptr(), pattern_index as u32, symbol, step_index as u32) - } + /// Check if a given step in a query is 'definite'. + /// + /// A query step is 'definite' if its parent pattern will be guaranteed to match + /// successfully once it reaches the step. + pub fn step_is_definite(&self, byte_offset: usize) -> bool { + unsafe { ffi::ts_query_step_is_definite(self.ptr.as_ptr(), byte_offset as u32) } } fn parse_property( diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 850cd31ea9..1e60e4b54f 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -719,11 +719,9 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( uint32_t *length ); -bool ts_query_pattern_is_definite( +bool ts_query_step_is_definite( const TSQuery *self, - uint32_t pattern_index, - TSSymbol symbol, - uint32_t step_index + uint32_t byte_offset ); /** diff --git a/lib/src/query.c b/lib/src/query.c index a156beb959..5a2bb2fb88 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -22,6 +22,7 @@ */ typedef struct { const char *input; + const char *start; const char *end; int32_t next; uint8_t next_size; @@ -96,6 +97,11 @@ typedef struct { uint32_t start_byte; } QueryPattern; +typedef struct { + uint32_t byte_offset; + uint16_t step_index; +} StepOffset; + /* * QueryState - The state of an in-progress match of a particular pattern * in a query. While executing, a `TSQueryCursor` must keep track of a number @@ -202,6 +208,7 @@ struct TSQuery { Array(PatternEntry) pattern_map; Array(TSQueryPredicateStep) predicate_steps; Array(QueryPattern) patterns; + Array(StepOffset) step_offsets; const TSLanguage *language; uint16_t wildcard_root_pattern_count; TSSymbol *symbol_map; @@ -268,21 +275,22 @@ static Stream stream_new(const char *string, uint32_t length) { Stream self = { .next = 0, .input = string, + .start = string, .end = string + length, }; stream_advance(&self); return self; } -static void stream_skip_whitespace(Stream *stream) { +static void stream_skip_whitespace(Stream *self) { for (;;) { - if (iswspace(stream->next)) { - stream_advance(stream); - } else if (stream->next == ';') { + if (iswspace(self->next)) { + stream_advance(self); + } else if (self->next == ';') { // skip over comments - stream_advance(stream); - while (stream->next && stream->next != '\n') { - if (!stream_advance(stream)) break; + stream_advance(self); + while (self->next && self->next != '\n') { + if (!stream_advance(self)) break; } } else { break; @@ -290,8 +298,8 @@ static void stream_skip_whitespace(Stream *stream) { } } -static bool stream_is_ident_start(Stream *stream) { - return iswalnum(stream->next) || stream->next == '_' || stream->next == '-'; +static bool stream_is_ident_start(Stream *self) { + return iswalnum(self->next) || self->next == '_' || self->next == '-'; } static void stream_scan_identifier(Stream *stream) { @@ -307,6 +315,10 @@ static void stream_scan_identifier(Stream *stream) { ); } +static uint32_t stream_offset(Stream *self) { + return self->input - self->start; +} + /****************** * CaptureListPool ******************/ @@ -716,7 +728,7 @@ static inline void ts_query__pattern_map_insert( // #define DEBUG_ANALYZE_QUERY -static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index) { +static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Identify all of the patterns in the query that have child patterns, both at the // top level and nested within other larger patterns. Record the step index where // each pattern starts. @@ -1165,12 +1177,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *impossible_index // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. if (result && !can_finish_pattern) { - unsigned exists; - array_search_sorted_by( - &self->patterns, 0, - .steps.offset, parent_step_index, - impossible_index, &exists - ); + assert(final_step_indices.size > 0); + uint16_t *impossible_step_index = array_back(&final_step_indices); + uint32_t i, exists; + array_search_sorted_by(&self->step_offsets, 0, .step_index, *impossible_step_index, &i, &exists); + assert(exists); + *error_offset = self->step_offsets.contents[i].byte_offset; result = false; } } @@ -1415,17 +1427,24 @@ static TSQueryError ts_query__parse_pattern( uint32_t depth, bool is_immediate ) { - const uint32_t starting_step_index = self->steps.size; - if (stream->next == 0) return TSQueryErrorSyntax; + if (stream->next == ')' || stream->next == ']') return PARENT_DONE; - // Finish the parent S-expression. - if (stream->next == ')' || stream->next == ']') { - return PARENT_DONE; + const uint32_t starting_step_index = self->steps.size; + + // Store the byte offset of each step in the query. + if ( + self->step_offsets.size == 0 || + array_back(&self->step_offsets)->step_index != starting_step_index + ) { + array_push(&self->step_offsets, ((StepOffset) { + .step_index = starting_step_index, + .byte_offset = stream_offset(stream), + })); } // An open bracket is the start of an alternation. - else if (stream->next == '[') { + if (stream->next == '[') { stream_advance(stream); stream_skip_whitespace(stream); @@ -1818,6 +1837,7 @@ TSQuery *ts_query_new( .predicate_values = symbol_table_new(), .predicate_steps = array_new(), .patterns = array_new(), + .step_offsets = array_new(), .symbol_map = symbol_map, .wildcard_root_pattern_count = 0, .language = language, @@ -1833,7 +1853,7 @@ TSQuery *ts_query_new( array_push(&self->patterns, ((QueryPattern) { .steps = (Slice) {.offset = start_step_index}, .predicate_steps = (Slice) {.offset = start_predicate_step_index}, - .start_byte = stream.input - source, + .start_byte = stream_offset(&stream), })); *error_type = ts_query__parse_pattern(self, &stream, 0, false); array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); @@ -1846,7 +1866,7 @@ TSQuery *ts_query_new( // and terminate. if (*error_type) { if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; - *error_offset = stream.input - source; + *error_offset = stream_offset(&stream); ts_query_delete(self); return NULL; } @@ -1882,10 +1902,8 @@ TSQuery *ts_query_new( } if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { - unsigned impossible_pattern_index = 0; - if (!ts_query__analyze_patterns(self, &impossible_pattern_index)) { + if (!ts_query__analyze_patterns(self, error_offset)) { *error_type = TSQueryErrorPattern; - *error_offset = self->patterns.contents[impossible_pattern_index].start_byte; ts_query_delete(self); return NULL; } @@ -1901,6 +1919,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->pattern_map); array_delete(&self->predicate_steps); array_delete(&self->patterns); + array_delete(&self->step_offsets); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); ts_free(self->symbol_map); @@ -1953,24 +1972,21 @@ uint32_t ts_query_start_byte_for_pattern( return self->patterns.contents[pattern_index].start_byte; } -bool ts_query_pattern_is_definite( +bool ts_query_step_is_definite( const TSQuery *self, - uint32_t pattern_index, - TSSymbol symbol, - uint32_t index + uint32_t byte_offset ) { - uint32_t step_index = self->patterns.contents[pattern_index].steps.offset; - QueryStep *step = &self->steps.contents[step_index]; - for (; step->depth != PATTERN_DONE_MARKER; step++) { - bool does_match = symbol ? - step->symbol == symbol : - step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL; - if (does_match) { - if (index == 0) return step->is_definite; - index--; - } + uint32_t step_index = UINT32_MAX; + for (unsigned i = 0; i < self->step_offsets.size; i++) { + StepOffset *step_offset = &self->step_offsets.contents[i]; + if (step_offset->byte_offset >= byte_offset) break; + step_index = step_offset->step_index; + } + if (step_index < self->steps.size) { + return self->steps.contents[step_index].is_definite; + } else { + return false; } - return false; } void ts_query_disable_capture( From 9daec9cb22d6485acd776dd826a889e583eb74ad Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 Aug 2020 13:24:42 -0700 Subject: [PATCH 151/282] Tweak impossible pattern error messages --- cli/src/error.rs | 2 +- cli/src/tests/query_test.rs | 6 +++--- docs/assets/js/playground.js | 2 +- lib/binding_rust/bindings.rs | 2 +- lib/binding_rust/lib.rs | 6 +++--- lib/binding_web/binding.js | 8 +++++--- lib/binding_web/exports.json | 3 --- lib/binding_web/test/query-test.js | 3 +++ lib/include/tree_sitter/api.h | 2 +- lib/src/query.c | 2 +- 10 files changed, 19 insertions(+), 17 deletions(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index c30e364796..075de3a6ab 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -70,7 +70,7 @@ impl<'a> From for Error { "Query error on line {}. Invalid syntax:\n{}", row, l )), - QueryError::Pattern(row, l) => Error::new(format!( + QueryError::Structure(row, l) => Error::new(format!( "Query error on line {}. Impossible pattern:\n{}", row, l )), diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1e4ea8cc08..e7231ef0c6 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -195,7 +195,7 @@ fn test_query_errors_on_impossible_patterns() { js_lang, "(binary_expression left: (identifier) left: (identifier))" ), - Err(QueryError::Pattern( + Err(QueryError::Structure( 1, [ "(binary_expression left: (identifier) left: (identifier))", @@ -212,7 +212,7 @@ fn test_query_errors_on_impossible_patterns() { .unwrap(); assert_eq!( Query::new(js_lang, "(function_declaration name: (statement_block))"), - Err(QueryError::Pattern( + Err(QueryError::Structure( 1, [ "(function_declaration name: (statement_block))", @@ -225,7 +225,7 @@ fn test_query_errors_on_impossible_patterns() { Query::new(rb_lang, "(call receiver:(call))").unwrap(); assert_eq!( Query::new(rb_lang, "(call receiver:(binary))"), - Err(QueryError::Pattern( + Err(QueryError::Structure( 1, [ "(call receiver:(binary))", // diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js index 686be90d3d..137bb352a6 100644 --- a/docs/assets/js/playground.js +++ b/docs/assets/js/playground.js @@ -277,7 +277,7 @@ let tree; const startPosition = queryEditor.posFromIndex(error.index); const endPosition = { line: startPosition.line, - ch: startPosition.ch + (error.length || 1) + ch: startPosition.ch + (error.length || Infinity) }; if (error.index === queryText.length) { diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 81cc6f9ad7..f28d346111 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -132,7 +132,7 @@ pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; -pub const TSQueryError_TSQueryErrorPattern: TSQueryError = 5; +pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; pub type TSQueryError = u32; extern "C" { #[doc = " Create a new parser."] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 10cd9fc28c..ea5893b426 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -163,7 +163,7 @@ pub enum QueryError { Field(usize, String), Capture(usize, String), Predicate(String), - Pattern(usize, String), + Structure(usize, String), } #[derive(Debug)] @@ -1206,8 +1206,8 @@ impl Query { "Unexpected EOF".to_string() }; match error_type { - ffi::TSQueryError_TSQueryErrorPattern => { - Err(QueryError::Pattern(row, message)) + ffi::TSQueryError_TSQueryErrorStructure => { + Err(QueryError::Structure(row, message)) } _ => Err(QueryError::Syntax(row, message)), } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 404beeb643..f731e8f868 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -667,8 +667,8 @@ class Language { const errorId = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); const errorByte = getValue(TRANSFER_BUFFER, 'i32'); const errorIndex = UTF8ToString(sourceAddress, errorByte).length; - const suffix = source.substr(errorIndex, 100); - const word = suffix.match(QUERY_WORD_REGEX)[0]; + const suffix = source.substr(errorIndex, 100).split('\n')[0]; + let word = suffix.match(QUERY_WORD_REGEX)[0]; let error; switch (errorId) { case 2: @@ -681,10 +681,12 @@ class Language { error = new RangeError(`Bad capture name @${word}`); break; case 5: - error = new SyntaxError(`Impossible pattern at offset ${errorIndex}: '${suffix}'...`); + error = new TypeError(`Bad pattern structure at offset ${errorIndex}: '${suffix}'...`); + word = ""; break; default: error = new SyntaxError(`Bad syntax at offset ${errorIndex}: '${suffix}'...`); + word = ""; break; } error.index = errorIndex; diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index 2c63824900..7210515863 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -15,7 +15,6 @@ "__ZNSt3__212basic_stringIwNS_11char_traitsIwEENS_9allocatorIwEEED2Ev", "__ZdlPv", "__Znwm", - "___assert_fail", "_abort", "_iswalnum", "_iswalpha", @@ -73,8 +72,6 @@ "_ts_query_capture_count", "_ts_query_capture_name_for_id", "_ts_query_captures_wasm", - "_ts_query_context_delete", - "_ts_query_context_new", "_ts_query_delete", "_ts_query_matches_wasm", "_ts_query_new", diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index 9d1e24e159..23663e9a4e 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -30,6 +30,9 @@ describe("Query", () => { assert.throws(() => { JavaScript.query("(function_declaration non_existent:(identifier))"); }, "Bad field name 'non_existent'"); + assert.throws(() => { + JavaScript.query("(function_declaration name:(statement_block))"); + }, "Bad pattern structure at offset 22: 'name:(statement_block))'"); }); it("throws an error on invalid predicates", () => { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1e60e4b54f..b85380d1e0 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -130,7 +130,7 @@ typedef enum { TSQueryErrorNodeType, TSQueryErrorField, TSQueryErrorCapture, - TSQueryErrorPattern, + TSQueryErrorStructure, } TSQueryError; /********************/ diff --git a/lib/src/query.c b/lib/src/query.c index 5a2bb2fb88..60c892d348 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1903,7 +1903,7 @@ TSQuery *ts_query_new( if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorPattern; + *error_type = TSQueryErrorStructure; ts_query_delete(self); return NULL; } From 456b1f6771de9ec689ea350eb4cbdfcf14baa283 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 Aug 2020 16:28:54 -0700 Subject: [PATCH 152/282] Fix handling of alternations and optional nodes in query analysis --- cli/src/tests/query_test.rs | 135 ++++++++++++++++++++++++++++++++++-- lib/src/query.c | 91 +++++++++++++----------- script/test | 12 ++-- 3 files changed, 188 insertions(+), 50 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index e7231ef0c6..816c3aee69 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1,11 +1,17 @@ use super::helpers::allocations; use super::helpers::fixtures::get_language; +use lazy_static::lazy_static; +use std::env; use std::fmt::Write; use tree_sitter::{ Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, }; +lazy_static! { + static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok(); +} + #[test] fn test_query_errors_on_invalid_syntax() { allocations::record(|| { @@ -234,6 +240,34 @@ fn test_query_errors_on_impossible_patterns() { .join("\n") )) ); + + Query::new( + js_lang, + "[ + (function (identifier)) + (function_declaration (identifier)) + (generator_function_declaration (identifier)) + ]", + ) + .unwrap(); + assert_eq!( + Query::new( + js_lang, + "[ + (function (identifier)) + (function_declaration (object)) + (generator_function_declaration (identifier)) + ]", + ), + Err(QueryError::Structure( + 3, + [ + " (function_declaration (object))", // + " ^", + ] + .join("\n") + )) + ); }); } @@ -2322,37 +2356,92 @@ fn test_query_alternative_predicate_prefix() { fn test_query_step_is_definite() { struct Row { language: Language, + description: &'static str, pattern: &'static str, results_by_substring: &'static [(&'static str, bool)], } let rows = &[ Row { + description: "no definite steps", language: get_language("python"), pattern: r#"(expression_statement (string))"#, results_by_substring: &[("expression_statement", false), ("string", false)], }, Row { + description: "all definite steps", language: get_language("javascript"), - pattern: r#"(expression_statement (string))"#, - results_by_substring: &[("expression_statement", false), ("string", false)], + pattern: r#"(object "{" "}")"#, + results_by_substring: &[("object", false), ("{", true), ("}", true)], }, Row { + description: "an indefinite step that is optional", language: get_language("javascript"), - pattern: r#"(object "{" "}")"#, - results_by_substring: &[("object", false), ("{", true), ("}", true)], + pattern: r#"(object "{" (identifier)? @foo "}")"#, + results_by_substring: &[ + ("object", false), + ("{", true), + ("(identifier)?", false), + ("}", true), + ], + }, + Row { + description: "multiple indefinite steps that are optional", + language: get_language("javascript"), + pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#, + results_by_substring: &[ + ("object", false), + ("{", true), + ("(identifier)? @id1", false), + ("\",\"", false), + ("}", true), + ], }, Row { + description: "definite step after indefinite step", language: get_language("javascript"), pattern: r#"(pair (property_identifier) ":")"#, results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)], }, Row { + description: "indefinite step in between two definite steps", + language: get_language("javascript"), + pattern: r#"(ternary_expression + condition: (_) + "?" + consequence: (call_expression) + ":" + alternative: (_))"#, + results_by_substring: &[ + ("condition:", false), + ("\"?\"", false), + ("consequence:", false), + ("\":\"", true), + ("alternative:", true), + ], + }, + Row { + description: "one definite step after a repetition", language: get_language("javascript"), pattern: r#"(object "{" (_) "}")"#, - results_by_substring: &[("object", false), ("{", false), ("", false), ("}", true)], + results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)], }, Row { + description: "definite steps after multiple repetitions", + language: get_language("json"), + pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#, + results_by_substring: &[ + ("object", false), + ("{", false), + ("(pair) \",\" (pair)", false), + ("(pair) \",\" (_)", false), + ("\",\" (_)", false), + ("(_)", true), + ("}", true), + ], + }, + Row { + description: "a definite with a field", language: get_language("javascript"), pattern: r#"(binary_expression left: (identifier) right: (_))"#, results_by_substring: &[ @@ -2362,6 +2451,7 @@ fn test_query_step_is_definite() { ], }, Row { + description: "multiple definite steps with fields", language: get_language("javascript"), pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, results_by_substring: &[ @@ -2371,6 +2461,7 @@ fn test_query_step_is_definite() { ], }, Row { + description: "nesting, one definite step", language: get_language("javascript"), pattern: r#" (function_declaration @@ -2386,6 +2477,7 @@ fn test_query_step_is_definite() { ], }, Row { + description: "definite step after some deeply nested hidden nodes", language: get_language("ruby"), pattern: r#" (singleton_class @@ -2399,6 +2491,7 @@ fn test_query_step_is_definite() { ], }, Row { + description: "nesting, no definite steps", language: get_language("javascript"), pattern: r#" (call_expression @@ -2409,6 +2502,7 @@ fn test_query_step_is_definite() { results_by_substring: &[("property_identifier", false), ("template_string", false)], }, Row { + description: "a definite step after a nested node", language: get_language("javascript"), pattern: r#" (subscript_expression @@ -2424,6 +2518,7 @@ fn test_query_step_is_definite() { ], }, Row { + description: "a step that is indefinite due to a predicate", language: get_language("javascript"), pattern: r#" (subscript_expression @@ -2439,17 +2534,45 @@ fn test_query_step_is_definite() { ("[", true), ], }, + Row { + description: "alternation where one branch has definite steps", + language: get_language("javascript"), + pattern: r#" + [ + (unary_expression (identifier)) + (call_expression + function: (_) + arguments: (_)) + (binary_expression right:(call_expression)) + ] + "#, + results_by_substring: &[ + ("identifier", false), + ("right:", false), + ("function:", true), + ("arguments:", true), + ], + }, ]; allocations::record(|| { + eprintln!(""); + for row in rows.iter() { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); let query = Query::new(row.language, row.pattern).unwrap(); for (substring, is_definite) in row.results_by_substring { let offset = row.pattern.find(substring).unwrap(); assert_eq!( query.step_is_definite(offset), *is_definite, - "Pattern: {:?}, substring: {:?}, expected is_definite to be {}", + "Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}", + row.description, row.pattern .split_ascii_whitespace() .collect::>() diff --git a/lib/src/query.c b/lib/src/query.c index 60c892d348..8464a69170 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1144,34 +1144,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { next_states = _states; } - // A query step is definite if the containing pattern will definitely match - // once the step is reached. In other words, a step is *not* definite if - // it's possible to create a syntax node that matches up to until that step, - // but does not match the entire pattern. - uint32_t child_step_index = parent_step_index + 1; - QueryStep *child_step = &self->steps.contents[child_step_index]; - while (child_step->depth == parent_depth + 1) { - // Check if there is any way for the pattern to reach this step, but fail - // to reach the end of the sub-pattern. - for (unsigned k = 0; k < final_step_indices.size; k++) { - uint32_t final_step_index = final_step_indices.contents[k]; - if ( - final_step_index >= child_step_index && - self->steps.contents[final_step_index].depth == child_step->depth - ) { - child_step->is_definite = false; - break; - } + // Mark as indefinite any step where a match terminated. + // Later, this property will be propagated to all of the step's predecessors. + for (unsigned j = 0; j < final_step_indices.size; j++) { + uint32_t final_step_index = final_step_indices.contents[j]; + QueryStep *step = &self->steps.contents[final_step_index]; + if ( + step->depth != PATTERN_DONE_MARKER && + step->depth > parent_depth && + !step->is_dead_end + ) { + step->is_definite = false; } - - // Advance to the next child step in this sub-pattern. - do { - child_step_index++; - child_step++; - } while ( - child_step->depth != PATTERN_DONE_MARKER && - child_step->depth > parent_depth + 1 - ); } // If this pattern cannot match, store the pattern index so that it can be @@ -1187,9 +1171,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } - // In order for a step to be definite, all of its child steps must be definite, - // and all of its later sibling steps must be definite. Propagate any indefiniteness - // upward and backward through the pattern trees. + // Mark as indefinite any step with captures that are used in predicates. Array(uint16_t) predicate_capture_ids = array_new(); for (unsigned i = 0; i < self->patterns.size; i++) { QueryPattern *pattern = &self->patterns.contents[i]; @@ -1207,16 +1189,13 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } - bool all_later_children_definite = true; + // Find all of the steps that have these captures. for ( unsigned start = pattern->steps.offset, end = start + pattern->steps.length, - j = end - 1; j + 1 > start; j-- + j = start; j < end; j++ ) { QueryStep *step = &self->steps.contents[j]; - - // If this step has a capture that is used in a predicate, - // then it is not definite. for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { uint16_t capture_id = step->capture_ids[k]; if (capture_id == NONE) break; @@ -1227,10 +1206,41 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { break; } } + } + } + + // Propagate indefiniteness backwards. + bool done = self->steps.size == 0; + while (!done) { + done = true; + for (unsigned i = self->steps.size - 1; i > 0; i--) { + QueryStep *step = &self->steps.contents[i]; + + // Determine if this step is definite or has definite alternatives. + bool is_definite = false; + for (;;) { + if (step->is_definite) { + is_definite = true; + break; + } + if (step->alternative_index == NONE || step->alternative_index < i) { + break; + } + step = &self->steps.contents[step->alternative_index]; + } - // If a step is not definite, then none of its predecessors can be definite. - if (!all_later_children_definite) step->is_definite = false; - if (!step->is_definite) all_later_children_definite = false; + // If not, mark its predecessor as indefinite. + if (!is_definite) { + QueryStep *prev_step = &self->steps.contents[i - 1]; + if ( + !prev_step->is_dead_end && + prev_step->depth != PATTERN_DONE_MARKER && + prev_step->is_definite + ) { + prev_step->is_definite = false; + done = false; + } + } } } @@ -1242,11 +1252,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { printf(" %u: DONE\n", i); } else { printf( - " %u: {symbol: %s, is_definite: %d}\n", + " %u: {symbol: %s, field: %s, is_definite: %d}\n", i, (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) ? "ANY" : ts_language_symbol_name(self->language, step->symbol), + (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), step->is_definite ); } @@ -1979,7 +1990,7 @@ bool ts_query_step_is_definite( uint32_t step_index = UINT32_MAX; for (unsigned i = 0; i < self->step_offsets.size; i++) { StepOffset *step_offset = &self->step_offsets.contents[i]; - if (step_offset->byte_offset >= byte_offset) break; + if (step_offset->byte_offset > byte_offset) break; step_index = step_offset->step_index; } if (step_index < self->steps.size) { diff --git a/script/test b/script/test index bcc88e24f4..31e9022610 100755 --- a/script/test +++ b/script/test @@ -83,10 +83,14 @@ done shift $(expr $OPTIND - 1) -if [[ -n $TREE_SITTER_TEST_LANGUAGE_FILTER || -n $TREE_SITTER_TEST_EXAMPLE_FILTER || -n $TREE_SITTER_TEST_TRIAL_FILTER ]]; then - top_level_filter=corpus -else - top_level_filter=$1 +top_level_filter=$1 + +if [[ \ + -n $TREE_SITTER_TEST_LANGUAGE_FILTER || \ + -n $TREE_SITTER_TEST_EXAMPLE_FILTER || \ + -n $TREE_SITTER_TEST_TRIAL_FILTER \ +]]; then + echo ${top_level_filter:=corpus} fi if [[ "${mode}" == "debug" ]]; then From 2eb04094f80048db6811e7238b8ed9b1f92c95ba Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 21 Aug 2020 14:12:04 -0700 Subject: [PATCH 153/282] Handle aliased parent nodes in query analysis --- cli/src/generate/render.rs | 221 ++++++++++++++++++++----------- cli/src/tests/query_test.rs | 8 ++ lib/include/tree_sitter/parser.h | 1 + lib/src/language.h | 27 ++++ lib/src/query.c | 80 ++++++----- 5 files changed, 225 insertions(+), 112 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 300ad38333..5b016cb645 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -7,7 +7,7 @@ use super::tables::{ }; use core::ops::Range; use std::cmp; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::fmt::Write; use std::mem::swap; @@ -69,7 +69,8 @@ struct Generator { symbol_order: HashMap, symbol_ids: HashMap, alias_ids: HashMap, - alias_map: BTreeMap>, + unique_aliases: Vec, + symbol_map: HashMap, field_names: Vec, next_abi: bool, } @@ -108,6 +109,8 @@ impl Generator { self.add_alias_sequences(); } + self.add_non_terminal_alias_map(); + let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); self.add_lex_function("ts_lex", main_lex_table, true); @@ -159,13 +162,72 @@ impl Generator { format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) }; self.alias_ids.entry(alias.clone()).or_insert(alias_id); - self.alias_map - .entry(alias.clone()) - .or_insert(matching_symbol); } } } + self.unique_aliases = self + .alias_ids + .keys() + .filter(|alias| { + self.parse_table + .symbols + .iter() + .cloned() + .find(|symbol| { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + }) + .is_none() + }) + .cloned() + .collect(); + self.unique_aliases.sort_unstable(); + + self.symbol_map = self + .parse_table + .symbols + .iter() + .map(|symbol| { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.simple_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.simple_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { + mapping = other_symbol; + } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; + } + } + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + mapping = other_symbol; + break; + } + } + } + + (*symbol, *mapping) + }) + .collect(); + field_names.sort_unstable(); field_names.dedup(); self.field_names = field_names.into_iter().cloned().collect(); @@ -255,11 +317,7 @@ impl Generator { "#define SYMBOL_COUNT {}", self.parse_table.symbols.len() ); - add_line!( - self, - "#define ALIAS_COUNT {}", - self.alias_map.iter().filter(|e| e.1.is_none()).count() - ); + add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),); add_line!(self, "#define TOKEN_COUNT {}", token_count); add_line!( self, @@ -287,11 +345,9 @@ impl Generator { i += 1; } } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!(self, "{} = {},", self.alias_ids[&alias], i); - i += 1; - } + for alias in &self.unique_aliases { + add_line!(self, "{} = {},", self.alias_ids[&alias], i); + i += 1; } dedent!(self); add_line!(self, "}};"); @@ -310,15 +366,13 @@ impl Generator { ); add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name); } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!( - self, - "[{}] = \"{}\",", - self.alias_ids[&alias], - self.sanitize_string(&alias.value) - ); - } + for alias in &self.unique_aliases { + add_line!( + self, + "[{}] = \"{}\",", + self.alias_ids[&alias], + self.sanitize_string(&alias.value) + ); } dedent!(self); add_line!(self, "}};"); @@ -329,58 +383,21 @@ impl Generator { add_line!(self, "static TSSymbol ts_symbol_map[] = {{"); indent!(self); for symbol in &self.parse_table.symbols { - let mut mapping = symbol; - - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.simple_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.simple_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; - } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { - mapping = other_symbol; - break; - } - } - } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; - } - } - } - add_line!( self, "[{}] = {},", - self.symbol_ids[&symbol], - self.symbol_ids[mapping], + self.symbol_ids[symbol], + self.symbol_ids[&self.symbol_map[symbol]], ); } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!( - self, - "[{}] = {},", - self.alias_ids[&alias], - self.alias_ids[&alias], - ); - } + for alias in &self.unique_aliases { + add_line!( + self, + "[{}] = {},", + self.alias_ids[&alias], + self.alias_ids[&alias], + ); } dedent!(self); @@ -451,15 +468,13 @@ impl Generator { dedent!(self); add_line!(self, "}},"); } - for (alias, matching_symbol) in &self.alias_map { - if matching_symbol.is_none() { - add_line!(self, "[{}] = {{", self.alias_ids[&alias]); - indent!(self); - add_line!(self, ".visible = true,"); - add_line!(self, ".named = {},", alias.is_named); - dedent!(self); - add_line!(self, "}},"); - } + for alias in &self.unique_aliases { + add_line!(self, "[{}] = {{", self.alias_ids[&alias]); + indent!(self); + add_line!(self, ".visible = true,"); + add_line!(self, ".named = {},", alias.is_named); + dedent!(self); + add_line!(self, "}},"); } dedent!(self); add_line!(self, "}};"); @@ -498,6 +513,50 @@ impl Generator { add_line!(self, ""); } + fn add_non_terminal_alias_map(&mut self) { + let mut aliases_by_symbol = HashMap::new(); + for variable in &self.syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + if let Some(alias) = &step.alias { + if step.symbol.is_non_terminal() + && !self.simple_aliases.contains_key(&step.symbol) + { + if self.symbol_ids.contains_key(&step.symbol) { + let alias_ids = + aliases_by_symbol.entry(step.symbol).or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias) { + alias_ids.insert(i, alias); + } + } + } + } + } + } + } + + let mut aliases_by_symbol = aliases_by_symbol.iter().collect::>(); + aliases_by_symbol.sort_unstable_by_key(|e| e.0); + + add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{"); + indent!(self); + for (symbol, aliases) in aliases_by_symbol { + let symbol_id = &self.symbol_ids[symbol]; + let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; + add_line!(self, "{}, {},", symbol_id, 1 + aliases.len()); + indent!(self); + add_line!(self, "{},", public_symbol_id); + for alias in aliases { + add_line!(self, "{},", &self.alias_ids[&alias]); + } + dedent!(self); + } + add_line!(self, "0,"); + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_field_sequences(&mut self) { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; @@ -1207,6 +1266,7 @@ impl Generator { add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); if self.next_abi { + add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); add_line!(self, ".state_count = STATE_COUNT,"); } @@ -1517,7 +1577,8 @@ pub(crate) fn render_c_code( symbol_ids: HashMap::new(), symbol_order: HashMap::new(), alias_ids: HashMap::new(), - alias_map: BTreeMap::new(), + symbol_map: HashMap::new(), + unique_aliases: Vec::new(), field_names: Vec::new(), next_abi, } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 816c3aee69..822fdd2222 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -2553,6 +2553,14 @@ fn test_query_step_is_definite() { ("arguments:", true), ], }, + Row { + description: "aliased parent node", + language: get_language("ruby"), + pattern: r#" + (method_parameters "(" (identifier) @id")") + "#, + results_by_substring: &[("\"(\"", false), ("(identifier)", false), ("\")\"", true)], + }, ]; allocations::record(|| { diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 360e012f44..84096132c3 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -119,6 +119,7 @@ struct TSLanguage { const uint16_t *small_parse_table; const uint32_t *small_parse_table_map; const TSSymbol *public_symbol_map; + const uint16_t *alias_map; uint32_t state_count; }; diff --git a/lib/src/language.h b/lib/src/language.h index f8fd1ae5c0..e5c07aa2aa 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -13,6 +13,7 @@ extern "C" { #define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11 #define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11 #define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12 +#define TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP 12 typedef struct { const TSParseAction *actions; @@ -258,6 +259,32 @@ static inline void ts_language_field_map( *end = &self->field_map_entries[slice.index] + slice.length; } +static inline void ts_language_aliases_for_symbol( + const TSLanguage *self, + TSSymbol original_symbol, + const TSSymbol **start, + const TSSymbol **end +) { + *start = &self->public_symbol_map[original_symbol]; + *end = *start + 1; + + if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP) return; + + unsigned i = 0; + for (;;) { + TSSymbol symbol = self->alias_map[i++]; + if (symbol == 0 || symbol > original_symbol) break; + uint16_t count = self->alias_map[i++]; + if (symbol == original_symbol) { + *start = &self->alias_map[i]; + *end = &self->alias_map[i + count]; + break; + } + i += count; + } +} + + #ifdef __cplusplus } #endif diff --git a/lib/src/query.c b/lib/src/query.c index 8464a69170..9f91143823 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -788,24 +788,32 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { const TSParseAction *action = &lookahead_iterator.actions[i]; if (action->type == TSParseActionTypeReduce) { - TSSymbol symbol = self->language->public_symbol_map[action->params.reduce.symbol]; - array_search_sorted_by( - &subgraphs, - 0, - .symbol, - symbol, - &subgraph_index, - &exists + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + action->params.reduce.symbol, + &aliases, + &aliases_end ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { - array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { - .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, - .done = true, - })); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + 0, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { + array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { + .state = state, + .production_id = action->params.reduce.production_id, + .child_index = action->params.reduce.child_count, + .done = true, + })); + } } } } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { @@ -815,22 +823,30 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != state) { state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); - TSSymbol symbol = self->language->public_symbol_map[lookahead_iterator.symbol]; - array_search_sorted_by( - &subgraphs, - 0, - .symbol, - symbol, - &subgraph_index, - &exists + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + lookahead_iterator.symbol, + &aliases, + &aliases_end ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if ( - subgraph->start_states.size == 0 || - *array_back(&subgraph->start_states) != state - ) - array_push(&subgraph->start_states, state); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + 0, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if ( + subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state + ) + array_push(&subgraph->start_states, state); + } } } } From 315f87bbff8b849734107cb6d0b8c66eea5d0276 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 24 Aug 2020 12:07:57 -0700 Subject: [PATCH 154/282] Remove unnecessary parameter from sorted array functions --- lib/src/array.h | 16 ++++++++-------- lib/src/query.c | 27 ++++++++++++--------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index 7fae7a4055..7b2d42fe44 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -87,23 +87,23 @@ extern "C" { #define _compare_int(a, b) ((int)*(a) - (int)(b)) -#define array_search_sorted_by(self, start, field, needle, index, exists) \ - array__search_sorted(self, start, _compare_int, field, needle, index, exists) +#define array_search_sorted_by(self, field, needle, index, exists) \ + array__search_sorted(self, 0, _compare_int, field, needle, index, exists) -#define array_search_sorted_with(self, start, compare, needle, index, exists) \ - array__search_sorted(self, start, compare, , needle, index, exists) +#define array_search_sorted_with(self, compare, needle, index, exists) \ + array__search_sorted(self, 0, compare, , needle, index, exists) -#define array_insert_sorted_by(self, start, field, value) \ +#define array_insert_sorted_by(self, field, value) \ do { \ unsigned index, exists; \ - array_search_sorted_by(self, start, field, (value) field, &index, &exists); \ + array_search_sorted_by(self, field, (value) field, &index, &exists); \ if (!exists) array_insert(self, index, value); \ } while (0) -#define array_insert_sorted_with(self, start, compare, value) \ +#define array_insert_sorted_with(self, compare, value) \ do { \ unsigned index, exists; \ - array_search_sorted_with(self, start, compare, &(value), &index, &exists); \ + array_search_sorted_with(self, compare, &(value), &index, &exists); \ if (!exists) array_insert(self, index, value); \ } while (0) diff --git a/lib/src/query.c b/lib/src/query.c index 9f91143823..8a7e5ea2db 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -764,12 +764,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { uint32_t parent_step_index = parent_step_indices.contents[i]; TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; AnalysisSubgraph subgraph = { .symbol = parent_symbol }; - array_insert_sorted_by(&subgraphs, 0, .symbol, subgraph); + array_insert_sorted_by(&subgraphs, .symbol, subgraph); } for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { if (!ts_language_symbol_metadata(self->language, sym).visible) { AnalysisSubgraph subgraph = { .symbol = sym }; - array_insert_sorted_by(&subgraphs, 0, .symbol, subgraph); + array_insert_sorted_by(&subgraphs, .symbol, subgraph); } } @@ -798,7 +798,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { array_search_sorted_by( &subgraphs, - 0, .symbol, *symbol, &subgraph_index, @@ -833,7 +832,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { array_search_sorted_by( &subgraphs, - 0, .symbol, *symbol, &subgraph_index, @@ -882,8 +880,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { }; unsigned index, exists; array_search_sorted_with( - &subgraph->nodes, 0, - analysis_subgraph_node__compare, &predecessor_node, + &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, &index, &exists ); if (!exists) { @@ -930,7 +927,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { uint16_t parent_depth = self->steps.contents[parent_step_index].depth; TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); + array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) continue; AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; @@ -996,7 +993,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (next_states.size > 0) { int comparison = analysis_state__compare_position(state, array_back(&next_states)); if (comparison == 0) { - array_insert_sorted_with(&next_states, 0, analysis_state__compare, *state); + array_insert_sorted_with(&next_states, analysis_state__compare, *state); continue; } else if (comparison > 0) { while (j < states.size) { @@ -1014,7 +1011,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { const QueryStep * const step = &self->steps.contents[state->step_index]; unsigned subgraph_index, exists; - array_search_sorted_by(&subgraphs, 0, .symbol, parent_symbol, &subgraph_index, &exists); + array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) continue; const AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; @@ -1044,7 +1041,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { }; unsigned node_index; array_search_sorted_with( - &subgraph->nodes, 0, + &subgraph->nodes, analysis_subgraph_node__compare, &successor, &node_index, &exists ); @@ -1132,9 +1129,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; if (did_finish_pattern) can_finish_pattern = true; if (next_state.depth > 0 && !did_finish_pattern) { - array_insert_sorted_with(&next_states, 0, analysis_state__compare, next_state); + array_insert_sorted_with(&next_states, analysis_state__compare, next_state); } else { - array_insert_sorted_by(&final_step_indices, 0, , next_state.step_index); + array_insert_sorted_by(&final_step_indices, , next_state.step_index); } } @@ -1180,7 +1177,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { assert(final_step_indices.size > 0); uint16_t *impossible_step_index = array_back(&final_step_indices); uint32_t i, exists; - array_search_sorted_by(&self->step_offsets, 0, .step_index, *impossible_step_index, &i, &exists); + array_search_sorted_by(&self->step_offsets, .step_index, *impossible_step_index, &i, &exists); assert(exists); *error_offset = self->step_offsets.contents[i].byte_offset; result = false; @@ -1201,7 +1198,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { ) { TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; if (step->type == TSQueryPredicateStepTypeCapture) { - array_insert_sorted_by(&predicate_capture_ids, 0, , step->value_id); + array_insert_sorted_by(&predicate_capture_ids, , step->value_id); } } @@ -1216,7 +1213,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { uint16_t capture_id = step->capture_ids[k]; if (capture_id == NONE) break; unsigned index, exists; - array_search_sorted_by(&predicate_capture_ids, 0, , capture_id, &index, &exists); + array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); if (exists) { step->is_definite = false; break; From 4aba684d6681278c82c3a472e0bead950da1ec9d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 24 Aug 2020 15:53:05 -0700 Subject: [PATCH 155/282] Control recursion depth explicitly during query analysis --- cli/src/tests/query_test.rs | 49 +++++++++++++++++ lib/src/query.c | 107 ++++++++++++++++++++++++++++++------ 2 files changed, 138 insertions(+), 18 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 822fdd2222..b857467bef 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -268,6 +268,29 @@ fn test_query_errors_on_impossible_patterns() { .join("\n") )) ); + + assert_eq!( + Query::new(js_lang, "(identifier (identifier))",), + Err(QueryError::Structure( + 1, + [ + "(identifier (identifier))", // + " ^", + ] + .join("\n") + )) + ); + assert_eq!( + Query::new(js_lang, "(true (true))",), + Err(QueryError::Structure( + 1, + [ + "(true (true))", // + " ^", + ] + .join("\n") + )) + ); }); } @@ -2561,6 +2584,32 @@ fn test_query_step_is_definite() { "#, results_by_substring: &[("\"(\"", false), ("(identifier)", false), ("\")\"", true)], }, + Row { + description: "long, but not too long to analyze", + language: get_language("javascript"), + pattern: r#" + (object "{" (pair) (pair) (pair) (pair) "}") + "#, + results_by_substring: &[ + ("\"{\"", false), + ("(pair)", false), + ("(pair) \"}\"", false), + ("\"}\"", true), + ], + }, + Row { + description: "too long to analyze", + language: get_language("javascript"), + pattern: r#" + (object "{" (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) "}") + "#, + results_by_substring: &[ + ("\"{\"", false), + ("(pair)", false), + ("(pair) \"}\"", false), + ("\"}\"", false), + ], + }, ]; allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 8a7e5ea2db..85f71aa648 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -8,13 +8,14 @@ #include "./unicode.h" #include +// #define DEBUG_ANALYZE_QUERY // #define LOG(...) fprintf(stderr, __VA_ARGS__) #define LOG(...) #define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 #define MAX_STATE_PREDECESSOR_COUNT 100 -#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_ANALYSIS_STATE_DEPTH 12 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -170,6 +171,8 @@ typedef struct { uint16_t step_index; } AnalysisState; +typedef Array(AnalysisState) AnalysisStateSet; + /* * AnalysisSubgraph - A subset of the states in the parse table that are used * in constructing nodes with a certain symbol. Each state is accompanied by @@ -585,6 +588,20 @@ static inline const TSStateId *state_predecessor_map_get( * AnalysisState ****************/ +static unsigned analysis_state__recursion_depth(const AnalysisState *self) { + unsigned result = 0; + for (unsigned i = 0; i < self->depth; i++) { + TSSymbol symbol = self->stack[i].parent_symbol; + for (unsigned j = 0; j < i; j++) { + if (self->stack[j].parent_symbol == symbol) { + result++; + break; + } + } + } + return result; +} + static inline int analysis_state__compare_position( const AnalysisState *self, const AnalysisState *other @@ -726,8 +743,6 @@ static inline void ts_query__pattern_map_insert( })); } -// #define DEBUG_ANALYZE_QUERY - static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Identify all of the patterns in the query that have child patterns, both at the // top level and nested within other larger patterns. Record the step index where @@ -917,23 +932,35 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // For each non-terminal pattern, determine if the pattern can successfully match, // and identify all of the possible children within the pattern where matching could fail. bool result = true; - typedef Array(AnalysisState) AnalysisStateList; - AnalysisStateList states = array_new(); - AnalysisStateList next_states = array_new(); + AnalysisStateSet states = array_new(); + AnalysisStateSet next_states = array_new(); + AnalysisStateSet deeper_states = array_new(); Array(uint16_t) final_step_indices = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { - // Find the subgraph that corresponds to this pattern's root symbol. uint16_t parent_step_index = parent_step_indices.contents[i]; uint16_t parent_depth = self->steps.contents[parent_step_index].depth; TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol; + if (parent_symbol == ts_builtin_sym_error) continue; + + // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's + // root symbols is not a non-terminal, then return an error. unsigned subgraph_index, exists; array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); - if (!exists) continue; - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if (!exists) { + unsigned first_child_step_index = parent_step_index + 1; + uint32_t i, exists; + array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists); + assert(exists); + *error_offset = self->step_offsets.contents[i].byte_offset; + result = false; + break; + } // Initialize an analysis state at every parse state in the table where // this parent symbol can occur. + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; array_clear(&states); + array_clear(&deeper_states); for (unsigned j = 0; j < subgraph->start_states.size; j++) { TSStateId parse_state = subgraph->start_states.contents[j]; array_push(&states, ((AnalysisState) { @@ -954,6 +981,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. bool can_finish_pattern = false; + bool did_exceed_max_depth = false; + unsigned recursion_depth_limit = 0; + unsigned prev_final_step_count = 0; array_clear(&final_step_indices); for (;;) { #ifdef DEBUG_ANALYZE_QUERY @@ -980,7 +1010,23 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } #endif - if (states.size == 0) break; + if (states.size == 0) { + if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); + #endif + + prev_final_step_count = final_step_indices.size; + recursion_depth_limit++; + AnalysisStateSet _states = states; + states = deeper_states; + deeper_states = _states; + continue; + } + + break; + } + array_clear(&next_states); for (unsigned j = 0; j < states.size; j++) { AnalysisState * const state = &states.contents[j]; @@ -1091,13 +1137,23 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // If this is a hidden child, then push a new entry to the stack, in order to // walk through the children of this child. - else if (sym >= self->language->token_count && next_state.depth < MAX_ANALYSIS_STATE_DEPTH) { + else if (sym >= self->language->token_count) { + if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { + did_exceed_max_depth = true; + continue; + } + next_state.depth++; analysis_state__top(&next_state)->parse_state = parse_state; analysis_state__top(&next_state)->child_index = 0; analysis_state__top(&next_state)->parent_symbol = sym; analysis_state__top(&next_state)->field_id = field_id; analysis_state__top(&next_state)->done = false; + + if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { + array_insert_sorted_with(&deeper_states, analysis_state__compare, next_state); + continue; + } } else { continue; } @@ -1128,10 +1184,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (!next_step->is_dead_end) { bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; if (did_finish_pattern) can_finish_pattern = true; - if (next_state.depth > 0 && !did_finish_pattern) { - array_insert_sorted_with(&next_states, analysis_state__compare, next_state); - } else { + if (did_finish_pattern || next_state.depth == 0) { array_insert_sorted_by(&final_step_indices, , next_state.step_index); + } else { + array_insert_sorted_with(&next_states, analysis_state__compare, next_state); } } @@ -1152,7 +1208,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } - AnalysisStateList _states = states; + AnalysisStateSet _states = states; states = next_states; next_states = _states; } @@ -1171,16 +1227,30 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } + if (did_exceed_max_depth) { + for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { + QueryStep *step = &self->steps.contents[j]; + if ( + step->depth <= parent_depth || + step->depth == PATTERN_DONE_MARKER + ) break; + if (!step->is_dead_end) { + step->is_definite = false; + } + } + } + // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. - if (result && !can_finish_pattern) { + if (result && !can_finish_pattern && !did_exceed_max_depth) { assert(final_step_indices.size > 0); - uint16_t *impossible_step_index = array_back(&final_step_indices); + uint16_t impossible_step_index = *array_back(&final_step_indices); uint32_t i, exists; - array_search_sorted_by(&self->step_offsets, .step_index, *impossible_step_index, &i, &exists); + array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); assert(exists); *error_offset = self->step_offsets.contents[i].byte_offset; result = false; + break; } } @@ -1286,6 +1356,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&next_nodes); array_delete(&states); array_delete(&next_states); + array_delete(&deeper_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); array_delete(&predicate_capture_ids); From 00c470ab2a9377a1ed382f0bb83ae87c3f18604c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Linse?= Date: Tue, 25 Aug 2020 19:34:44 +0200 Subject: [PATCH 156/282] Fix a few cases of Clang 10 with UBSAN detecting undefined behavior Clang 10 considers adding any offset, including 0, to the null pointer to be undefined behavior. `(void *)NULL + 0 = kaboom`. --- lib/src/lexer.c | 2 +- lib/src/query.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/src/lexer.c b/lib/src/lexer.c index 3f8a4c0ae8..a3c29544d3 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -73,7 +73,6 @@ static void ts_lexer__get_chunk(Lexer *self) { // code that spans the current position. static void ts_lexer__get_lookahead(Lexer *self) { uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; - const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; uint32_t size = self->chunk_size - position_in_chunk; if (size == 0) { @@ -82,6 +81,7 @@ static void ts_lexer__get_lookahead(Lexer *self) { return; } + const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8 ? ts_decode_utf8 : ts_decode_utf16; diff --git a/lib/src/query.c b/lib/src/query.c index 8c8bd4c354..b887b74ff6 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1252,6 +1252,9 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern( ) { Slice slice = self->predicates_by_pattern.contents[pattern_index]; *step_count = slice.length; + if (self->predicate_steps.contents == NULL) { + return NULL; + } return &self->predicate_steps.contents[slice.offset]; } From 04eacc44efc2d529fe48ed0b16d1b1c182376627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Linse?= Date: Wed, 26 Aug 2020 10:04:08 +0200 Subject: [PATCH 157/282] avoid warnings for implicit fallthrough in switch statements --- lib/src/parser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/src/parser.c b/lib/src/parser.c index 37d1a1c2fd..79cad797a0 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -292,6 +292,7 @@ static bool ts_parser__better_version_exists( return true; case ErrorComparisonPreferRight: if (ts_stack_can_merge(self->stack, i, version)) return true; + break; default: break; } @@ -975,6 +976,7 @@ static bool ts_parser__do_all_potential_reductions( .dynamic_precedence = action.params.reduce.dynamic_precedence, .production_id = action.params.reduce.production_id, }); + break; default: break; } From 45eab0ab24fdc391ffd69da99c09c0a2b2e2fbc3 Mon Sep 17 00:00:00 2001 From: TravonteD Date: Tue, 1 Sep 2020 22:34:45 -0400 Subject: [PATCH 158/282] add link to Fennel parser --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index 8551d1ebcf..03c1a60cc5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,6 +34,7 @@ Parsers for these languages are fairly complete: * [Elm](https://github.com/razzeee/tree-sitter-elm) * [Eno](https://github.com/eno-lang/tree-sitter-eno) * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) +- [Fennel](https://github.com/travonted/tree-sitter-fennel) * [Go](https://github.com/tree-sitter/tree-sitter-go) * [HTML](https://github.com/tree-sitter/tree-sitter-html) * [Java](https://github.com/tree-sitter/tree-sitter-java) From 4b9db41584069d4a6a0bc0776410212eb00820d5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 2 Sep 2020 09:17:48 -0700 Subject: [PATCH 159/282] Remove unnecessary echo in test script --- script/test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/test b/script/test index 31e9022610..9b578dcf03 100755 --- a/script/test +++ b/script/test @@ -90,7 +90,7 @@ if [[ \ -n $TREE_SITTER_TEST_EXAMPLE_FILTER || \ -n $TREE_SITTER_TEST_TRIAL_FILTER \ ]]; then - echo ${top_level_filter:=corpus} + : ${top_level_filter:=corpus} fi if [[ "${mode}" == "debug" ]]; then From 31a22fc627e49003bf5410cebbda08808600b4ac Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 2 Sep 2020 09:59:26 -0700 Subject: [PATCH 160/282] In array.h, add comments and sort functions more logically --- lib/src/array.h | 95 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index 7b2d42fe44..de8c8cb381 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -12,9 +12,9 @@ extern "C" { #include #include "./alloc.h" -#define Array(T) \ - struct { \ - T *contents; \ +#define Array(T) \ + struct { \ + T *contents; \ uint32_t size; \ uint32_t capacity; \ } @@ -37,15 +37,15 @@ extern "C" { #define array_reserve(self, new_capacity) \ array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity) -#define array_erase(self, index) \ - array__erase((VoidArray *)(self), array__elem_size(self), index) - +// Free any memory allocated for this array. #define array_delete(self) array__delete((VoidArray *)self) #define array_push(self, element) \ (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \ (self)->contents[(self)->size++] = (element)) +// Increase the array's size by a given number of elements, reallocating +// if necessary. New elements are zero-initialized. #define array_grow_by(self, count) \ (array__grow((VoidArray *)(self), count, array__elem_size(self)), \ memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \ @@ -54,56 +54,61 @@ extern "C" { #define array_push_all(self, other) \ array_splice((self), (self)->size, 0, (other)->size, (other)->contents) +// Remove `old_count` elements from the array starting at the given `index`. At +// the same index, insert `new_count` new elements, reading their values from the +// `new_contents` pointer. #define array_splice(self, index, old_count, new_count, new_contents) \ array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \ new_count, new_contents) +// Insert one `element` into the array at the given `index`. #define array_insert(self, index, element) \ array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element) +// Remove one `element` from the array at the given `index`. +#define array_erase(self, index) \ + array__erase((VoidArray *)(self), array__elem_size(self), index) + #define array_pop(self) ((self)->contents[--(self)->size]) #define array_assign(self, other) \ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) -#define array__search_sorted(self, start, compare, suffix, needle, index, exists) \ - do { \ - *(index) = start; \ - *(exists) = false; \ - uint32_t size = (self)->size - *(index); \ - if (size == 0) break; \ - int comparison; \ - while (size > 1) { \ - uint32_t half_size = size / 2; \ - uint32_t mid_index = *(index) + half_size; \ - comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - if (comparison <= 0) *(index) = mid_index; \ - size -= half_size; \ - } \ - comparison = compare(&((self)->contents[*(index)] suffix), (needle)); \ - if (comparison == 0) *(exists) = true; \ - else if (comparison < 0) *(index) += 1; \ - } while (0) - -#define _compare_int(a, b) ((int)*(a) - (int)(b)) +// Search a sorted array for a given `needle` value, using the given `compare` +// callback to determine the order. +// +// If an existing element is found to be equal to `needle`, then the `index` +// out-parameter is set to the existing value's index, and the `exists` +// out-parameter is set to true. Otherwise, `index` is set to an index where +// `needle` should be inserted in order to preserve the sorting, and `exists` +// is set to false. +#define array_search_sorted_with(self, compare, needle, index, exists) \ + array__search_sorted(self, 0, compare, , needle, index, exists) +// Search a sorted array for a given `needle` value, using integer comparisons +// of a given struct field (specified with a leading dot) to determine the order. +// +// See also `array_search_sorted_with`. #define array_search_sorted_by(self, field, needle, index, exists) \ array__search_sorted(self, 0, _compare_int, field, needle, index, exists) -#define array_search_sorted_with(self, compare, needle, index, exists) \ - array__search_sorted(self, 0, compare, , needle, index, exists) - -#define array_insert_sorted_by(self, field, value) \ +// Insert a given `value` into a sorted array, using the given `compare` +// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ do { \ unsigned index, exists; \ - array_search_sorted_by(self, field, (value) field, &index, &exists); \ + array_search_sorted_with(self, compare, &(value), &index, &exists); \ if (!exists) array_insert(self, index, value); \ } while (0) -#define array_insert_sorted_with(self, compare, value) \ +// Insert a given `value` into a sorted array, using integer comparisons of +// a given struct field (specified with a leading dot) to determine the order. +// +// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ do { \ unsigned index, exists; \ - array_search_sorted_with(self, compare, &(value), &index, &exists); \ + array_search_sorted_by(self, field, (value) field, &index, &exists); \ if (!exists) array_insert(self, index, value); \ } while (0) @@ -192,6 +197,30 @@ static inline void array__splice(VoidArray *self, size_t element_size, self->size += new_count - old_count; } +// A binary search routine, based on Rust's `std::slice::binary_search_by`. +#define array__search_sorted(self, start, compare, suffix, needle, index, exists) \ + do { \ + *(index) = start; \ + *(exists) = false; \ + uint32_t size = (self)->size - *(index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(index)] suffix), (needle)); \ + if (comparison == 0) *(exists) = true; \ + else if (comparison < 0) *(index) += 1; \ + } while (0) + +// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + #ifdef __cplusplus } #endif From 6256110bd2126861014a409aa3da2c9b7d64bc06 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 2 Sep 2020 13:31:29 -0400 Subject: [PATCH 161/282] simplest changes, just error in the build --- cli/build.rs | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index 0ed9ef067d..80692a9e4d 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { @@ -6,12 +6,45 @@ fn main() { println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha); } + if let Some(missing) = wasm_files_present() { + println!("error: couldn't find required wasm binding file {}", missing.display()); + println!("Have you run `script/build-wasm?`"); + std::process::exit(1); + } + println!( "cargo:rustc-env=BUILD_TARGET={}", std::env::var("TARGET").unwrap() ); } +#[cfg(unix)] +fn required_files() -> std::vec::Vec<&'static Path> { + return vec![ + Path::new("../cli/src/web_ui.html"), + Path::new("../docs/assets/js/playground.js"), + Path::new("../lib/binding_web/tree-sitter.js"), + Path::new("../lib/binding_web/tree-sitter.wasm") + ]; +} + +#[cfg(windows)] +fn required_files() -> std::vec::Vec<&'static Path> { + return vec![ + Path::new("../cli/src/web_ui.html"), + Path::new("../docs/assets/js/playground.js"), + ]; +} + +fn wasm_files_present() -> Option<&'static Path> { + for path in required_files() { + if !path.exists() { + return Some(path) + } + } + return None +} + fn read_git_sha() -> Option { let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); @@ -51,7 +84,6 @@ fn read_git_sha() -> Option { } return fs::read_to_string(&ref_filename).ok(); } - // If we're on a detached commit, then the `HEAD` file itself contains the sha. else if head_content.len() == 40 { return Some(head_content); From 36a8821f3ed5460c72876da51ad4569ba79dbe7f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 2 Sep 2020 12:03:46 -0700 Subject: [PATCH 162/282] Fix behavior of the last child operator in tree queries --- cli/src/tests/query_test.rs | 35 +++++++++++ lib/src/query.c | 10 +-- lib/src/tree_cursor.c | 117 ++++++++++++++++++++++++------------ lib/src/tree_cursor.h | 2 +- 4 files changed, 121 insertions(+), 43 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index b857467bef..f3521bb567 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -667,6 +667,41 @@ fn test_query_matches_with_immediate_siblings() { (2, vec![("first-element", "1")]), ], ); + + let query = Query::new( + language, + " + (block . (_) @first-stmt) + (block (_) @stmt) + (block (_) @last-stmt .) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + if a: + b() + c() + if d(): e(); f() + g() + ", + &[ + (0, vec![("first-stmt", "b()")]), + (1, vec![("stmt", "b()")]), + (1, vec![("stmt", "c()")]), + (1, vec![("stmt", "if d(): e(); f()")]), + (0, vec![("first-stmt", "e()")]), + (1, vec![("stmt", "e()")]), + (1, vec![("stmt", "f()")]), + (2, vec![("last-stmt", "f()")]), + (1, vec![("stmt", "g()")]), + (2, vec![("last-stmt", "g()")]), + ], + ); + }); } diff --git a/lib/src/query.c b/lib/src/query.c index b629af51e8..45aa387761 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -2549,11 +2549,13 @@ static inline bool ts_query_cursor__advance( if (symbol != ts_builtin_sym_error && self->query->symbol_map) { symbol = self->query->symbol_map[symbol]; } - bool can_have_later_siblings; + bool has_later_siblings; + bool has_later_named_siblings; bool can_have_later_siblings_with_this_field; TSFieldId field_id = ts_tree_cursor_current_status( &self->cursor, - &can_have_later_siblings, + &has_later_siblings, + &has_later_named_siblings, &can_have_later_siblings_with_this_field ); LOG( @@ -2613,11 +2615,11 @@ static inline bool ts_query_cursor__advance( step->symbol == symbol || step->symbol == WILDCARD_SYMBOL || (step->symbol == NAMED_WILDCARD_SYMBOL && is_named); - bool later_sibling_can_match = can_have_later_siblings; + bool later_sibling_can_match = has_later_siblings; if ((step->is_immediate && is_named) || state->seeking_immediate_match) { later_sibling_can_match = false; } - if (step->is_last_child && can_have_later_siblings) { + if (step->is_last_child && has_later_named_siblings) { node_does_match = false; } if (step->field) { diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 06c724d282..b193a75450 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -244,14 +244,18 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { ); } +// Private - Get various facts about the current node that are needed +// when executing tree queries. TSFieldId ts_tree_cursor_current_status( const TSTreeCursor *_self, - bool *can_have_later_siblings, + bool *has_later_siblings, + bool *has_later_named_siblings, bool *can_have_later_siblings_with_this_field ) { const TreeCursor *self = (const TreeCursor *)_self; TSFieldId result = 0; - *can_have_later_siblings = false; + *has_later_siblings = false; + *has_later_named_siblings = false; *can_have_later_siblings_with_this_field = false; // Walk up the tree, visiting the current node and its invisible ancestors, @@ -260,49 +264,86 @@ TSFieldId ts_tree_cursor_current_status( TreeCursorEntry *entry = &self->stack.contents[i]; TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - // Stop walking up when a visible ancestor is found. - if (i != self->stack.size - 1) { - if (ts_subtree_visible(*entry->subtree)) break; - if ( - !ts_subtree_extra(*entry->subtree) && - ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ) - ) break; - } - - if (ts_subtree_child_count(*parent_entry->subtree) > entry->child_index + 1) { - *can_have_later_siblings = true; - } - - if (ts_subtree_extra(*entry->subtree)) break; - - const TSFieldMapEntry *field_map, *field_map_end; - ts_language_field_map( + const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->production_id, - &field_map, &field_map_end + parent_entry->subtree->ptr->production_id ); - // Look for a field name associated with the current node. - if (!result) { - for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (!i->inherited && i->child_index == entry->structural_child_index) { - result = i->field_id; - *can_have_later_siblings_with_this_field = false; - break; + // If the subtree is visible, return its public-facing symbol. + // Otherwise, return zero. + #define subtree_visible_symbol(subtree, structural_child_index) \ + (( \ + !ts_subtree_extra(subtree) && \ + alias_sequence && \ + alias_sequence[structural_child_index] \ + ) ? \ + alias_sequence[structural_child_index] : \ + ts_subtree_visible(subtree) ? \ + ts_subtree_symbol(subtree) : \ + 0) \ + + // Stop walking up when a visible ancestor is found. + if ( + i != self->stack.size - 1 && + subtree_visible_symbol(*entry->subtree, entry->structural_child_index) + ) break; + + // Determine if the current node has later siblings. + if (!*has_later_siblings) { + unsigned sibling_count = parent_entry->subtree->ptr->child_count; + unsigned structural_child_index = entry->structural_child_index; + if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; + for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { + Subtree sibling = parent_entry->subtree->ptr->children[j]; + if (ts_subtree_visible_child_count(sibling) > 0) { + *has_later_siblings = true; + if (*has_later_named_siblings) break; + if (sibling.ptr->named_child_count > 0) { + *has_later_named_siblings = true; + break; + } } + TSSymbol visible_symbol = subtree_visible_symbol(sibling, structural_child_index); + if (visible_symbol) { + *has_later_siblings = true; + if (*has_later_named_siblings) break; + if (ts_language_symbol_metadata(self->tree->language, visible_symbol).named) { + *has_later_named_siblings = true; + break; + } + } + if (!ts_subtree_extra(sibling)) structural_child_index++; } } - // Determine if there other later siblings with the same field name. - if (result) { - for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (i->field_id == result && i->child_index > entry->structural_child_index) { - *can_have_later_siblings_with_this_field = true; - break; + #undef subtree_visible_symbol + + if (!ts_subtree_extra(*entry->subtree)) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self->tree->language, + parent_entry->subtree->ptr->production_id, + &field_map, &field_map_end + ); + + // Look for a field name associated with the current node. + if (!result) { + for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { + if (!i->inherited && i->child_index == entry->structural_child_index) { + result = i->field_id; + *can_have_later_siblings_with_this_field = false; + break; + } + } + } + + // Determine if the current node can have later siblings with the same field name. + if (result) { + for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { + if (i->field_id == result && i->child_index > entry->structural_child_index) { + *can_have_later_siblings_with_this_field = true; + break; + } } } } diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 5a39dd278c..0bb486d738 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -16,6 +16,6 @@ typedef struct { } TreeCursor; void ts_tree_cursor_init(TreeCursor *, TSNode); -TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *); +TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *, bool *); #endif // TREE_SITTER_TREE_CURSOR_H_ From 92a17e782f9f785193de204e3dfc3997e47c9ee0 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 2 Sep 2020 15:29:49 -0400 Subject: [PATCH 163/282] Conditionally compile with a cfg variable instead. --- cli/build.rs | 19 +++++++++---------- cli/src/web_ui.rs | 21 ++++++++++++++++++++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index 80692a9e4d..cb7421aa9a 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,15 +1,14 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; +use std::vec::Vec; fn main() { if let Some(git_sha) = read_git_sha() { println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha); } - if let Some(missing) = wasm_files_present() { - println!("error: couldn't find required wasm binding file {}", missing.display()); - println!("Have you run `script/build-wasm?`"); - std::process::exit(1); + if wasm_files_present() { + println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING"); } println!( @@ -19,7 +18,7 @@ fn main() { } #[cfg(unix)] -fn required_files() -> std::vec::Vec<&'static Path> { +fn required_files() -> Vec<&'static Path> { return vec![ Path::new("../cli/src/web_ui.html"), Path::new("../docs/assets/js/playground.js"), @@ -29,20 +28,20 @@ fn required_files() -> std::vec::Vec<&'static Path> { } #[cfg(windows)] -fn required_files() -> std::vec::Vec<&'static Path> { +fn required_files() -> Vec<&'static Path> { return vec![ Path::new("../cli/src/web_ui.html"), Path::new("../docs/assets/js/playground.js"), ]; } -fn wasm_files_present() -> Option<&'static Path> { +fn wasm_files_present() -> bool { for path in required_files() { - if !path.exists() { - return Some(path) + if path.exists() { + return false } } - return None + return true } fn read_git_sha() -> Option { diff --git a/cli/src/web_ui.rs b/cli/src/web_ui.rs index 7d4c7eec23..0c0c161fda 100644 --- a/cli/src/web_ui.rs +++ b/cli/src/web_ui.rs @@ -10,6 +10,16 @@ use webbrowser; macro_rules! resource { ($name: tt, $path: tt) => { + #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] + fn $name(tree_sitter_dir: &Option) -> Vec { + if let Some(tree_sitter_dir) = tree_sitter_dir { + fs::read(tree_sitter_dir.join($path)).unwrap() + } else { + include_bytes!(concat!("../../", $path)).to_vec() + } + } + + #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] fn $name(tree_sitter_dir: &Option) -> Vec { if let Some(tree_sitter_dir) = tree_sitter_dir { fs::read(tree_sitter_dir.join($path)).unwrap() @@ -22,7 +32,7 @@ macro_rules! resource { macro_rules! posix_resource { ($name: tt, $path: tt) => { - #[cfg(unix)] + #[cfg(all(unix, TREE_SITTER_EMBED_WASM_BINDING))] fn $name(tree_sitter_dir: &Option) -> Vec { if let Some(tree_sitter_dir) = tree_sitter_dir { fs::read(tree_sitter_dir.join($path)).unwrap() @@ -31,6 +41,15 @@ macro_rules! posix_resource { } } + #[cfg(all(unix, not(TREE_SITTER_EMBED_WASM_BINDING)))] + fn $name(tree_sitter_dir: &Option) -> Vec { + if let Some(tree_sitter_dir) = tree_sitter_dir { + fs::read(tree_sitter_dir.join($path)).unwrap() + } else { + Vec::new() + } + } + #[cfg(windows)] fn $name(_: &Option) -> Vec { Vec::new() From 85cdf3dc4900ea6940d8e7b9c1f49a70cda7e3a8 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 2 Sep 2020 15:37:21 -0400 Subject: [PATCH 164/282] adjust docs --- docs/section-6-contributing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/section-6-contributing.md b/docs/section-6-contributing.md index 1ebe50b340..690f38f6a1 100644 --- a/docs/section-6-contributing.md +++ b/docs/section-6-contributing.md @@ -29,7 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter cd tree-sitter ``` -Build the WASM library. We do this first because it gets embedded in the CLI to enable the `web-ui` command. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker: +To use the `web-ui` command, you'll need to build the WASM library. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker: ```sh ./script/build-wasm From 660fe8630085afde9f7c1b8d844a4bc6d4a8d42e Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 8 Sep 2020 09:15:00 -0400 Subject: [PATCH 165/282] web_ui and playground.js are always there. --- cli/build.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index cb7421aa9a..4c6b76da1d 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -20,8 +20,6 @@ fn main() { #[cfg(unix)] fn required_files() -> Vec<&'static Path> { return vec![ - Path::new("../cli/src/web_ui.html"), - Path::new("../docs/assets/js/playground.js"), Path::new("../lib/binding_web/tree-sitter.js"), Path::new("../lib/binding_web/tree-sitter.wasm") ]; @@ -30,8 +28,6 @@ fn required_files() -> Vec<&'static Path> { #[cfg(windows)] fn required_files() -> Vec<&'static Path> { return vec![ - Path::new("../cli/src/web_ui.html"), - Path::new("../docs/assets/js/playground.js"), ]; } From 865f59ad745a579b00da5c6851229c619e798702 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 8 Sep 2020 09:23:54 -0400 Subject: [PATCH 166/282] No need for platformish logic in build.rs. --- cli/build.rs | 9 +-------- cli/src/web_ui.rs | 9 ++------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index 4c6b76da1d..983c5f9574 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -17,7 +17,6 @@ fn main() { ); } -#[cfg(unix)] fn required_files() -> Vec<&'static Path> { return vec![ Path::new("../lib/binding_web/tree-sitter.js"), @@ -25,15 +24,9 @@ fn required_files() -> Vec<&'static Path> { ]; } -#[cfg(windows)] -fn required_files() -> Vec<&'static Path> { - return vec![ - ]; -} - fn wasm_files_present() -> bool { for path in required_files() { - if path.exists() { + if !path.exists() { return false } } diff --git a/cli/src/web_ui.rs b/cli/src/web_ui.rs index 0c0c161fda..ac544c7515 100644 --- a/cli/src/web_ui.rs +++ b/cli/src/web_ui.rs @@ -32,7 +32,7 @@ macro_rules! resource { macro_rules! posix_resource { ($name: tt, $path: tt) => { - #[cfg(all(unix, TREE_SITTER_EMBED_WASM_BINDING))] + #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] fn $name(tree_sitter_dir: &Option) -> Vec { if let Some(tree_sitter_dir) = tree_sitter_dir { fs::read(tree_sitter_dir.join($path)).unwrap() @@ -41,7 +41,7 @@ macro_rules! posix_resource { } } - #[cfg(all(unix, not(TREE_SITTER_EMBED_WASM_BINDING)))] + #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] fn $name(tree_sitter_dir: &Option) -> Vec { if let Some(tree_sitter_dir) = tree_sitter_dir { fs::read(tree_sitter_dir.join($path)).unwrap() @@ -49,11 +49,6 @@ macro_rules! posix_resource { Vec::new() } } - - #[cfg(windows)] - fn $name(_: &Option) -> Vec { - Vec::new() - } }; } From 2a1bd3dbc26b82799ae10c7e24ef3a5a17e8f151 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 8 Sep 2020 09:25:38 -0400 Subject: [PATCH 167/282] Better naming. --- cli/src/web_ui.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/web_ui.rs b/cli/src/web_ui.rs index ac544c7515..9b29a73a0f 100644 --- a/cli/src/web_ui.rs +++ b/cli/src/web_ui.rs @@ -30,7 +30,7 @@ macro_rules! resource { }; } -macro_rules! posix_resource { +macro_rules! optional_resource { ($name: tt, $path: tt) => { #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] fn $name(tree_sitter_dir: &Option) -> Vec { @@ -54,8 +54,8 @@ macro_rules! posix_resource { resource!(get_main_html, "cli/src/web_ui.html"); resource!(get_playground_js, "docs/assets/js/playground.js"); -posix_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); -posix_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); +optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); +optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); pub fn serve(grammar_path: &Path, open_in_browser: bool) { let port = get_available_port().expect("Couldn't find an available port"); From ff488f89c93aad767b46c4fe08a20e5fd26b54dc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 8 Sep 2020 10:58:20 -0700 Subject: [PATCH 168/282] Make the --prev-abi flag work w/ the newest abi change --- cli/src/generate/mod.rs | 15 ++++----------- cli/src/generate/render.rs | 4 +++- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 12a59e1bb4..830c4a65b4 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -31,16 +31,9 @@ lazy_static! { .unwrap(); } -const NEW_HEADER_PARTS: [&'static str; 2] = [ - " - uint32_t large_state_count; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSSymbol *public_symbol_map;", - " -#define SMALL_STATE(id) id - LARGE_STATE_COUNT -", -]; +const NEW_HEADER_PARTS: &[&'static str] = &[" + const uint16_t *alias_map; + uint32_t state_count;"]; struct GeneratedParser { c_code: String, @@ -101,7 +94,7 @@ pub fn generate_parser_in_directory( } else { let mut header = tree_sitter::PARSER_HEADER.to_string(); - for part in &NEW_HEADER_PARTS { + for part in NEW_HEADER_PARTS.iter() { let pos = header .find(part) .expect("Missing expected part of parser.h header"); diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 5b016cb645..2758eb58f2 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -109,7 +109,9 @@ impl Generator { self.add_alias_sequences(); } - self.add_non_terminal_alias_map(); + if self.next_abi { + self.add_non_terminal_alias_map(); + } let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); From 71f5908806364011d912224fb44ea81c8fad6474 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 15 Sep 2020 13:08:03 -0400 Subject: [PATCH 169/282] Max's suggestions. --- cli/build.rs | 19 ++++++------------- docs/section-6-contributing.md | 2 +- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index 983c5f9574..deba438167 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -17,20 +17,13 @@ fn main() { ); } -fn required_files() -> Vec<&'static Path> { - return vec![ - Path::new("../lib/binding_web/tree-sitter.js"), - Path::new("../lib/binding_web/tree-sitter.wasm") - ]; -} - fn wasm_files_present() -> bool { - for path in required_files() { - if !path.exists() { - return false - } - } - return true + let paths = [ + "../lib/binding_web/tree-sitter.js", + "../lib/binding_web/tree-sitter.wasm", + ]; + + return paths.iter().all(|p| Path::new(p).exists()) } fn read_git_sha() -> Option { diff --git a/docs/section-6-contributing.md b/docs/section-6-contributing.md index 690f38f6a1..4ccaddea30 100644 --- a/docs/section-6-contributing.md +++ b/docs/section-6-contributing.md @@ -29,7 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter cd tree-sitter ``` -To use the `web-ui` command, you'll need to build the WASM library. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker: +Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker: ```sh ./script/build-wasm From 931d0c26007343b87c7dcfd4bc15c27d086ac3fe Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 15 Sep 2020 13:22:22 -0400 Subject: [PATCH 170/282] fix warning and use implicit return here --- cli/build.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cli/build.rs b/cli/build.rs index deba438167..47506018a0 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,6 +1,5 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; -use std::vec::Vec; fn main() { if let Some(git_sha) = read_git_sha() { @@ -23,7 +22,7 @@ fn wasm_files_present() -> bool { "../lib/binding_web/tree-sitter.wasm", ]; - return paths.iter().all(|p| Path::new(p).exists()) + paths.iter().all(|p| Path::new(p).exists()) } fn read_git_sha() -> Option { From 43a3f1bbe05387cb692588bd1bd7ebbe6047856a Mon Sep 17 00:00:00 2001 From: Elijah Mooring <45398751+Vehmloewff@users.noreply.github.com> Date: Sat, 19 Sep 2020 11:19:26 -0500 Subject: [PATCH 171/282] Update section-3-creating-parsers.md --- docs/section-3-creating-parsers.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 694f8daeae..4d0befcfbc 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -210,6 +210,7 @@ The following is a complete list of built-in functions you can use in your `gram * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. * **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. * **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token. +* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace. * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string. * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children. From b5a9adb555bb0db783cd12070dcab392df1cf823 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Sep 2020 12:34:48 -0700 Subject: [PATCH 172/282] Allow queries to match on supertypes Co-authored-by: Ayman Nadeem --- .../build_tables/minimize_parse_table.rs | 1 + cli/src/generate/render.rs | 3 + cli/src/tests/query_test.rs | 40 +++++++++- lib/include/tree_sitter/parser.h | 1 + lib/src/language.c | 2 +- lib/src/query.c | 79 +++++++++++++++---- lib/src/tree_cursor.c | 44 +++++++++++ lib/src/tree_cursor.h | 1 + 8 files changed, 153 insertions(+), 18 deletions(-) diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index aa4801c8af..d159a2c4fc 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -68,6 +68,7 @@ impl<'a> Minimizer<'a> { .. } => { if !self.simple_aliases.contains_key(&symbol) + && !self.syntax_grammar.supertype_symbols.contains(&symbol) && !aliased_symbols.contains(&symbol) && self.syntax_grammar.variables[symbol.index].kind != VariableType::Named diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 2758eb58f2..f33539d690 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -460,6 +460,9 @@ impl Generator { VariableType::Hidden => { add_line!(self, ".visible = false,"); add_line!(self, ".named = true,"); + if self.syntax_grammar.supertype_symbols.contains(symbol) { + add_line!(self, ".supertype = true,"); + } } VariableType::Auxiliary => { add_line!(self, ".visible = false,"); diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index f3521bb567..900b7be1d0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -701,7 +701,6 @@ fn test_query_matches_with_immediate_siblings() { (2, vec![("last-stmt", "g()")]), ], ); - }); } @@ -1395,6 +1394,45 @@ fn test_query_matches_with_anonymous_tokens() { }); } +#[test] +fn test_query_matches_with_supertypes() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + language, + r#" + ((_simple_statement) @before . (_simple_statement) @after) + + (assignment + left: (left_hand_side (identifier) @def)) + + (_primary_expression/identifier) @ref + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = b + print c + if d: print e.f; print g.h.i + ", + &[ + (1, vec![("def", "a")]), + (2, vec![("ref", "b")]), + (0, vec![("before", "a = b"), ("after", "print c")]), + (2, vec![("ref", "c")]), + (2, vec![("ref", "d")]), + (2, vec![("ref", "e")]), + (0, vec![("before", "print e.f"), ("after", "print g.h.i")]), + (2, vec![("ref", "g")]), + ], + ); + }); +} + #[test] fn test_query_matches_within_byte_range() { allocations::record(|| { diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 84096132c3..c5a788ff64 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -35,6 +35,7 @@ typedef uint16_t TSStateId; typedef struct { bool visible : 1; bool named : 1; + bool supertype: 1; } TSSymbolMetadata; typedef struct TSLexer TSLexer; diff --git a/lib/src/language.c b/lib/src/language.c index c00c49e3c0..9ccf2bc369 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -89,7 +89,7 @@ TSSymbol ts_language_symbol_for_name( uint32_t count = ts_language_symbol_count(self); for (TSSymbol i = 0; i < count; i++) { TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); - if (!metadata.visible || metadata.named != is_named) continue; + if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; const char *symbol_name = self->symbol_names[i]; if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { diff --git a/lib/src/query.c b/lib/src/query.c index 45aa387761..0ca03782b3 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -47,6 +47,7 @@ typedef struct { */ typedef struct { TSSymbol symbol; + TSSymbol supertype_symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; uint16_t alternative_index; @@ -1626,14 +1627,9 @@ static TSQueryError ts_query__parse_pattern( else { TSSymbol symbol; - // Parse the wildcard symbol - if ( - stream->next == '_' || - - // TODO - remove. - // For temporary backward compatibility, handle '*' as a wildcard. - stream->next == '*' - ) { + // TODO - remove. + // For temporary backward compatibility, handle '*' as a wildcard. + if (stream->next == '*') { symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; stream_advance(stream); } @@ -1651,25 +1647,61 @@ static TSQueryError ts_query__parse_pattern( return ts_query__parse_predicate(self, stream); } - symbol = ts_language_symbol_for_name( + // Parse the wildcard symbol + else if (length == 1 && node_name[0] == '_') { + symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; + } + + else { + symbol = ts_language_symbol_for_name( + self->language, + node_name, + length, + true + ); + if (!symbol) { + stream_reset(stream, node_name); + return TSQueryErrorNodeType; + } + } + } else { + return TSQueryErrorSyntax; + } + + // Add a step for the node. + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + if (ts_language_symbol_metadata(self->language, symbol).supertype) { + QueryStep *step = array_back(&self->steps); + step->supertype_symbol = step->symbol; + step->symbol = WILDCARD_SYMBOL; + } + + stream_skip_whitespace(stream); + + if (stream->next == '/') { + stream_advance(stream); + if (!stream_is_ident_start(stream)) { + return TSQueryErrorSyntax; + } + + const char *node_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - node_name; + + QueryStep *step = array_back(&self->steps); + step->symbol = ts_language_symbol_for_name( self->language, node_name, length, true ); - if (!symbol) { + if (!step->symbol) { stream_reset(stream, node_name); return TSQueryErrorNodeType; } - } else { - return TSQueryErrorSyntax; } - // Add a step for the node. - array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - // Parse the child patterns - stream_skip_whitespace(stream); bool child_is_immediate = false; uint16_t child_start_step_index = self->steps.size; for (;;) { @@ -2622,6 +2654,21 @@ static inline bool ts_query_cursor__advance( if (step->is_last_child && has_later_named_siblings) { node_does_match = false; } + if (step->supertype_symbol) { + bool has_supertype = ts_tree_cursor_has_supertype(&self->cursor, step->supertype_symbol); + + if (symbol == 1) { + LOG( + " has supertype %s: %d", + ts_language_symbol_name(self->query->language, step->supertype_symbol), + has_supertype + ); + } + + if (!has_supertype) { + node_does_match = false; + } + } if (step->field) { if (step->field == field_id) { if (!can_have_later_siblings_with_this_field) { diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index b193a75450..8ef17960aa 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -352,6 +352,50 @@ TSFieldId ts_tree_cursor_current_status( return result; } +bool ts_tree_cursor_has_supertype( + const TSTreeCursor *_self, + TSSymbol supertype_symbol +) { + const TreeCursor *self = (const TreeCursor *)_self; + + // Walk up the tree, visiting the current node and its invisible ancestors, + // because fields can refer to nodes through invisible *wrapper* nodes, + for (unsigned i = self->stack.size - 1; i > 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->production_id + ); + + // If the subtree is visible, return its public-facing symbol. + // Otherwise, return zero. + #define subtree_visible_symbol(subtree, structural_child_index) \ + (( \ + !ts_subtree_extra(subtree) && \ + alias_sequence && \ + alias_sequence[structural_child_index] \ + ) ? \ + alias_sequence[structural_child_index] : \ + ts_subtree_visible(subtree) ? \ + ts_subtree_symbol(subtree) : \ + 0) \ + + // Stop walking up when a visible ancestor is found. + if ( + i != self->stack.size - 1 && + subtree_visible_symbol(*entry->subtree, entry->structural_child_index) + ) break; + + if (ts_subtree_symbol(*entry->subtree) == supertype_symbol) { + return true; + } + } + + return false; +} + TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 0bb486d738..7829e8b94f 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -17,5 +17,6 @@ typedef struct { void ts_tree_cursor_init(TreeCursor *, TSNode); TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *, bool *); +bool ts_tree_cursor_has_supertype(const TSTreeCursor *, TSSymbol); #endif // TREE_SITTER_TREE_CURSOR_H_ From 8835dfda99d838c5985baa4b30234f6249515019 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Sep 2020 13:11:54 -0700 Subject: [PATCH 173/282] Fix test for supertypes in queries --- cli/src/tests/query_test.rs | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 900b7be1d0..b9b2811754 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1401,12 +1401,15 @@ fn test_query_matches_with_supertypes() { let query = Query::new( language, r#" - ((_simple_statement) @before . (_simple_statement) @after) + (argument_list (_expression) @arg) + + (keyword_argument + value: (_expression) @kw_arg) (assignment - left: (left_hand_side (identifier) @def)) + left: (left_hand_side (identifier) @var_def)) - (_primary_expression/identifier) @ref + (_primary_expression/identifier) @var_ref "#, ) .unwrap(); @@ -1415,19 +1418,19 @@ fn test_query_matches_with_supertypes() { language, &query, " - a = b - print c - if d: print e.f; print g.h.i + a = b.c( + [d], + # a comment + e=f + ) ", &[ - (1, vec![("def", "a")]), - (2, vec![("ref", "b")]), - (0, vec![("before", "a = b"), ("after", "print c")]), - (2, vec![("ref", "c")]), - (2, vec![("ref", "d")]), - (2, vec![("ref", "e")]), - (0, vec![("before", "print e.f"), ("after", "print g.h.i")]), - (2, vec![("ref", "g")]), + (2, vec![("var_def", "a")]), + (3, vec![("var_ref", "b")]), + (0, vec![("arg", "[d]")]), + (3, vec![("var_ref", "d")]), + (1, vec![("kw_arg", "f")]), + (3, vec![("var_ref", "f")]), ], ); }); From a8d77001c247fbfc3dddfb24d5eeb13274d2b751 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Sep 2020 13:20:50 -0700 Subject: [PATCH 174/282] Update c error recovery test to reflect behavior change --- test/fixtures/error_corpus/c_errors.txt | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index 4d0c8e8bcb..b873324535 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -158,9 +158,17 @@ int a() { (translation_unit (function_definition (primitive_type) - (function_declarator (identifier) (parameter_list)) + (function_declarator + (identifier) + (parameter_list)) (compound_statement - (struct_specifier (type_identifier)) - (ERROR (number_literal)) - (primitive_type) - (ERROR (number_literal))))) + (declaration + (struct_specifier (type_identifier)) + (init_declarator + (MISSING identifier) + (number_literal))) + (declaration + (primitive_type) + (init_declarator + (MISSING identifier) + (number_literal)))))) From 5003064da71f46e169f45247d67b2813d11f93e5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 09:35:14 -0700 Subject: [PATCH 175/282] Make supertypes automatically hidden, without underscore prefix --- cli/src/generate/node_types.rs | 9 +-------- cli/src/generate/prepare_grammar/intern_symbols.rs | 6 ++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 7a5768a5bc..7962c7f33b 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -325,15 +325,8 @@ pub(crate) fn get_variable_info( } for supertype_symbol in &syntax_grammar.supertype_symbols { - let variable = &syntax_grammar.variables[supertype_symbol.index]; - if variable.kind != VariableType::Hidden { - return Err(Error::grammar(&format!( - "Supertype symbols must be hidden, but `{}` is not", - variable.name - ))); - } - if result[supertype_symbol.index].has_multi_step_production { + let variable = &syntax_grammar.variables[supertype_symbol.index]; return Err(Error::grammar(&format!( "Supertype symbols must always have a single visible child, but `{}` can have multiple", variable.name diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 7cd411ef6b..276f13ff0b 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -73,6 +73,12 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result ); } + for (i, variable) in variables.iter_mut().enumerate() { + if supertype_symbols.contains(&Symbol::non_terminal(i)) { + variable.kind = VariableType::Hidden; + } + } + Ok(InternedGrammar { variables, external_tokens, From 21c3bbc4b4bca92b1e79c45ebd7845b92c422ab8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 10:55:31 -0700 Subject: [PATCH 176/282] Account for supertypes during query analysis --- cli/src/tests/query_test.rs | 18 ++++++++++++++++++ lib/src/query.c | 12 +++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index b9b2811754..598a97920b 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -291,6 +291,24 @@ fn test_query_errors_on_impossible_patterns() { .join("\n") )) ); + + Query::new( + js_lang, + "(if_statement + condition: (parenthesized_expression (_expression) @cond))", + ) + .unwrap(); + assert_eq!( + Query::new(js_lang, "(if_statement condition: (_expression))",), + Err(QueryError::Structure( + 1, + [ + "(if_statement condition: (_expression))", // + " ^", + ] + .join("\n") + )) + ); }); } diff --git a/lib/src/query.c b/lib/src/query.c index 0ca03782b3..288656ac1b 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -639,6 +639,13 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { return &self->stack[self->depth - 1]; } +static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { + for (unsigned i = 0; i < self->depth; i++) { + if (self->stack[i].parent_symbol == symbol) return true; + } + return false; +} + /*********************** * AnalysisSubgraphNode ***********************/ @@ -1134,6 +1141,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (step->field && step->field != field_id) { does_match = false; } + if (step->supertype_symbol) { + if (!analysis_state__has_supertype(state, step->supertype_symbol)) does_match = false; + } } // If this is a hidden child, then push a new entry to the stack, in order to @@ -1673,7 +1683,7 @@ static TSQueryError ts_query__parse_pattern( if (ts_language_symbol_metadata(self->language, symbol).supertype) { QueryStep *step = array_back(&self->steps); step->supertype_symbol = step->symbol; - step->symbol = WILDCARD_SYMBOL; + step->symbol = NAMED_WILDCARD_SYMBOL; } stream_skip_whitespace(stream); From a544200a6c9f012e4c111c83b46619ccf572523a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 11:47:47 -0700 Subject: [PATCH 177/282] Simplify query code for matching supertypes --- lib/src/query.c | 45 ++++++++-------- lib/src/tree_cursor.c | 120 ++++++++++++++++-------------------------- lib/src/tree_cursor.h | 11 +++- 3 files changed, 77 insertions(+), 99 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 288656ac1b..8c86badbed 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -640,10 +640,10 @@ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { } static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { - for (unsigned i = 0; i < self->depth; i++) { - if (self->stack[i].parent_symbol == symbol) return true; - } - return false; + for (unsigned i = 0; i < self->depth; i++) { + if (self->stack[i].parent_symbol == symbol) return true; + } + return false; } /*********************** @@ -1141,9 +1141,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (step->field && step->field != field_id) { does_match = false; } - if (step->supertype_symbol) { - if (!analysis_state__has_supertype(state, step->supertype_symbol)) does_match = false; - } + if ( + step->supertype_symbol && + !analysis_state__has_supertype(state, step->supertype_symbol) + ) does_match = false; } // If this is a hidden child, then push a new entry to the stack, in order to @@ -2594,11 +2595,17 @@ static inline bool ts_query_cursor__advance( bool has_later_siblings; bool has_later_named_siblings; bool can_have_later_siblings_with_this_field; - TSFieldId field_id = ts_tree_cursor_current_status( + TSFieldId field_id = 0; + TSSymbol supertypes[8] = {0}; + unsigned supertype_count = 8; + ts_tree_cursor_current_status( &self->cursor, + &field_id, &has_later_siblings, &has_later_named_siblings, - &can_have_later_siblings_with_this_field + &can_have_later_siblings_with_this_field, + supertypes, + &supertype_count ); LOG( "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", @@ -2617,6 +2624,7 @@ static inline bool ts_query_cursor__advance( // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; + if (step->supertype_symbol && !supertype_count) continue; ts_query_cursor__add_state(self, pattern); } @@ -2665,19 +2673,14 @@ static inline bool ts_query_cursor__advance( node_does_match = false; } if (step->supertype_symbol) { - bool has_supertype = ts_tree_cursor_has_supertype(&self->cursor, step->supertype_symbol); - - if (symbol == 1) { - LOG( - " has supertype %s: %d", - ts_language_symbol_name(self->query->language, step->supertype_symbol), - has_supertype - ); - } - - if (!has_supertype) { - node_does_match = false; + bool has_supertype = false; + for (unsigned j = 0; j < supertype_count; j++) { + if (supertypes[j] == step->supertype_symbol) { + has_supertype = true; + break; + } } + if (!has_supertype) node_does_match = false; } if (step->field) { if (step->field == field_id) { diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 8ef17960aa..64e8b41423 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -246,14 +246,19 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { // Private - Get various facts about the current node that are needed // when executing tree queries. -TSFieldId ts_tree_cursor_current_status( +void ts_tree_cursor_current_status( const TSTreeCursor *_self, + TSFieldId *field_id, bool *has_later_siblings, bool *has_later_named_siblings, - bool *can_have_later_siblings_with_this_field + bool *can_have_later_siblings_with_this_field, + TSSymbol *supertypes, + unsigned *supertype_count ) { const TreeCursor *self = (const TreeCursor *)_self; - TSFieldId result = 0; + unsigned max_supertypes = *supertype_count; + *field_id = 0; + *supertype_count = 0; *has_later_siblings = false; *has_later_named_siblings = false; *can_have_later_siblings_with_this_field = false; @@ -269,24 +274,31 @@ TSFieldId ts_tree_cursor_current_status( parent_entry->subtree->ptr->production_id ); - // If the subtree is visible, return its public-facing symbol. - // Otherwise, return zero. - #define subtree_visible_symbol(subtree, structural_child_index) \ - (( \ - !ts_subtree_extra(subtree) && \ - alias_sequence && \ - alias_sequence[structural_child_index] \ - ) ? \ - alias_sequence[structural_child_index] : \ - ts_subtree_visible(subtree) ? \ - ts_subtree_symbol(subtree) : \ - 0) \ + #define subtree_symbol(subtree, structural_child_index) \ + (( \ + !ts_subtree_extra(subtree) && \ + alias_sequence && \ + alias_sequence[structural_child_index] \ + ) ? \ + alias_sequence[structural_child_index] : \ + ts_subtree_symbol(subtree)) // Stop walking up when a visible ancestor is found. - if ( - i != self->stack.size - 1 && - subtree_visible_symbol(*entry->subtree, entry->structural_child_index) - ) break; + TSSymbol entry_symbol = subtree_symbol( + *entry->subtree, + entry->structural_child_index + ); + TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( + self->tree->language, + entry_symbol + ); + if (i != self->stack.size - 1 && entry_metadata.visible) break; + + // Record any supertypes + if (entry_metadata.supertype && *supertype_count < max_supertypes) { + supertypes[*supertype_count] = entry_symbol; + (*supertype_count)++; + } // Determine if the current node has later siblings. if (!*has_later_siblings) { @@ -295,19 +307,21 @@ TSFieldId ts_tree_cursor_current_status( if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { Subtree sibling = parent_entry->subtree->ptr->children[j]; - if (ts_subtree_visible_child_count(sibling) > 0) { + TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( + self->tree->language, + subtree_symbol(sibling, structural_child_index) + ); + if (sibling_metadata.visible) { *has_later_siblings = true; if (*has_later_named_siblings) break; - if (sibling.ptr->named_child_count > 0) { + if (sibling_metadata.named) { *has_later_named_siblings = true; break; } - } - TSSymbol visible_symbol = subtree_visible_symbol(sibling, structural_child_index); - if (visible_symbol) { + } else if (ts_subtree_visible_child_count(sibling) > 0) { *has_later_siblings = true; if (*has_later_named_siblings) break; - if (ts_language_symbol_metadata(self->tree->language, visible_symbol).named) { + if (sibling.ptr->named_child_count > 0) { *has_later_named_siblings = true; break; } @@ -316,7 +330,7 @@ TSFieldId ts_tree_cursor_current_status( } } - #undef subtree_visible_symbol + #undef subtree_metadata if (!ts_subtree_extra(*entry->subtree)) { const TSFieldMapEntry *field_map, *field_map_end; @@ -327,10 +341,10 @@ TSFieldId ts_tree_cursor_current_status( ); // Look for a field name associated with the current node. - if (!result) { + if (!*field_id) { for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { if (!i->inherited && i->child_index == entry->structural_child_index) { - result = i->field_id; + *field_id = i->field_id; *can_have_later_siblings_with_this_field = false; break; } @@ -338,9 +352,9 @@ TSFieldId ts_tree_cursor_current_status( } // Determine if the current node can have later siblings with the same field name. - if (result) { + if (*field_id) { for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (i->field_id == result && i->child_index > entry->structural_child_index) { + if (i->field_id == *field_id && i->child_index > entry->structural_child_index) { *can_have_later_siblings_with_this_field = true; break; } @@ -348,52 +362,6 @@ TSFieldId ts_tree_cursor_current_status( } } } - - return result; -} - -bool ts_tree_cursor_has_supertype( - const TSTreeCursor *_self, - TSSymbol supertype_symbol -) { - const TreeCursor *self = (const TreeCursor *)_self; - - // Walk up the tree, visiting the current node and its invisible ancestors, - // because fields can refer to nodes through invisible *wrapper* nodes, - for (unsigned i = self->stack.size - 1; i > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->production_id - ); - - // If the subtree is visible, return its public-facing symbol. - // Otherwise, return zero. - #define subtree_visible_symbol(subtree, structural_child_index) \ - (( \ - !ts_subtree_extra(subtree) && \ - alias_sequence && \ - alias_sequence[structural_child_index] \ - ) ? \ - alias_sequence[structural_child_index] : \ - ts_subtree_visible(subtree) ? \ - ts_subtree_symbol(subtree) : \ - 0) \ - - // Stop walking up when a visible ancestor is found. - if ( - i != self->stack.size - 1 && - subtree_visible_symbol(*entry->subtree, entry->structural_child_index) - ) break; - - if (ts_subtree_symbol(*entry->subtree) == supertype_symbol) { - return true; - } - } - - return false; } TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 7829e8b94f..7c9c05d582 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -16,7 +16,14 @@ typedef struct { } TreeCursor; void ts_tree_cursor_init(TreeCursor *, TSNode); -TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *, bool *); -bool ts_tree_cursor_has_supertype(const TSTreeCursor *, TSSymbol); +void ts_tree_cursor_current_status( + const TSTreeCursor *, + TSFieldId *, + bool *, + bool *, + bool *, + TSSymbol *, + unsigned * +); #endif // TREE_SITTER_TREE_CURSOR_H_ From cb343cad5e8fe103c80d59df38d83b79f300596c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 12:59:24 -0700 Subject: [PATCH 178/282] Avoid reusing the root node during incremental parsing Fixes #712 --- cli/src/tests/parser_test.rs | 12 ++++++++++++ lib/src/reusable_node.h | 25 ++++++++++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 392d1a9ded..b2b2560e4d 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -395,6 +395,18 @@ fn test_parsing_after_editing_end_of_code() { assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]); } +#[test] +fn test_parsing_empty_file_with_reused_tree() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let tree = parser.parse("", None); + parser.parse("", tree.as_ref()); + + let tree = parser.parse("\n ", None); + parser.parse("\n ", tree.as_ref()); +} + // Thread safety #[test] diff --git a/lib/src/reusable_node.h b/lib/src/reusable_node.h index 9cba951909..e5ccaa2a28 100644 --- a/lib/src/reusable_node.h +++ b/lib/src/reusable_node.h @@ -20,15 +20,6 @@ static inline void reusable_node_clear(ReusableNode *self) { self->last_external_token = NULL_SUBTREE; } -static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { - reusable_node_clear(self); - array_push(&self->stack, ((StackEntry) { - .tree = tree, - .child_index = 0, - .byte_offset = 0, - })); -} - static inline Subtree reusable_node_tree(ReusableNode *self) { return self->stack.size > 0 ? self->stack.contents[self->stack.size - 1].tree @@ -86,3 +77,19 @@ static inline void reusable_node_advance_past_leaf(ReusableNode *self) { while (reusable_node_descend(self)) {} reusable_node_advance(self); } + +static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { + reusable_node_clear(self); + array_push(&self->stack, ((StackEntry) { + .tree = tree, + .child_index = 0, + .byte_offset = 0, + })); + + // Never reuse the root node, because it has a non-standard internal structure + // due to transformations that are applied when it is accepted: adding the EOF + // child and any extra children. + if (!reusable_node_descend(self)) { + reusable_node_clear(self); + } +} From ffd3bdc4c1c297bae58c5608fdbaf2f6448252b1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 13:06:06 -0700 Subject: [PATCH 179/282] Escape ? in C string literals Fixes #714 --- cli/src/generate/render.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f33539d690..cf3109c8a9 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1509,6 +1509,7 @@ impl Generator { for c in name.chars() { match c { '\"' => result += "\\\"", + '?' => result += "\\?", '\\' => result += "\\\\", '\u{000c}' => result += "\\f", '\n' => result += "\\n", From b6fba7ca4c32207fa9b387b594a8da2ff66ee4be Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 13:09:19 -0700 Subject: [PATCH 180/282] 0.17.0 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 117ac49e6b..21277ff978 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.16.9" +version = "0.17.0" dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 52a2ed6b9f..bd065b7134 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.16.9" +version = "0.17.0" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 01afe1075f..8ad062d898 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.16.9", + "version": "0.17.0", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 9185262e48ed67b92bf63a029839bae0758d7429 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 13:17:35 -0700 Subject: [PATCH 181/282] web: 0.17.0 --- lib/binding_web/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index 9c93dac689..463659fef6 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -1,6 +1,6 @@ { "name": "web-tree-sitter", - "version": "0.16.4", + "version": "0.17.0", "description": "Tree-sitter bindings for the web", "main": "tree-sitter.js", "types": "tree-sitter-web.d.ts", From 297e2bcb288e9aa5fac3d68558d9e9c31a92ebbb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Sep 2020 16:55:48 -0700 Subject: [PATCH 182/282] static query analysis: Fix handling of fields in hidden nodes --- cli/src/tests/query_test.rs | 14 +++++++++++--- lib/src/query.c | 2 +- script/fetch-fixtures | 1 + script/fetch-fixtures.cmd | 1 + 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 598a97920b..5fd07680e7 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1419,15 +1419,15 @@ fn test_query_matches_with_supertypes() { let query = Query::new( language, r#" - (argument_list (_expression) @arg) + (argument_list (expression) @arg) (keyword_argument - value: (_expression) @kw_arg) + value: (expression) @kw_arg) (assignment left: (left_hand_side (identifier) @var_def)) - (_primary_expression/identifier) @var_ref + (primary_expression/identifier) @var_ref "#, ) .unwrap(); @@ -2704,6 +2704,14 @@ fn test_query_step_is_definite() { ("\"}\"", false), ], }, + Row { + description: "hidden nodes that have several fields", + language: get_language("java"), + pattern: r#" + (method_declaration name: (identifier)) + "#, + results_by_substring: &[("name:", true)], + }, ]; allocations::record(|| { diff --git a/lib/src/query.c b/lib/src/query.c index 8c86badbed..b6e4895174 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1116,7 +1116,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); for (; field_map != field_map_end; field_map++) { - if (field_map->child_index == child_index) { + if (!field_map->inherited && field_map->child_index == child_index) { field_id = field_map->field_id; break; } diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 96cee76e8e..1eec16eea0 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -27,6 +27,7 @@ fetch_grammar cpp master fetch_grammar embedded-template master fetch_grammar go master fetch_grammar html master +fetch_grammar java master fetch_grammar javascript master fetch_grammar jsdoc master fetch_grammar json master diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 4b3bb56247..32727b0c48 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -6,6 +6,7 @@ call:fetch_grammar cpp master call:fetch_grammar embedded-template master call:fetch_grammar go master call:fetch_grammar html master +call:fetch_grammar java master call:fetch_grammar javascript master call:fetch_grammar jsdoc master call:fetch_grammar json master From 518916f2211373658cf148c9618e588396bd0a12 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 13:47:27 -0700 Subject: [PATCH 183/282] Return correct path and line in query errors from the CLI --- cli/src/error.rs | 38 ++--- cli/src/loader.rs | 79 +++++++-- cli/src/test.rs | 8 +- cli/src/tests/query_test.rs | 321 +++++++++++++++++++++--------------- lib/binding_rust/lib.rs | 160 ++++++++++++------ 5 files changed, 372 insertions(+), 234 deletions(-) diff --git a/cli/src/error.rs b/cli/src/error.rs index 075de3a6ab..63b57c9e42 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,7 +1,7 @@ use super::test_highlight; use std::fmt::Write; use std::io; -use tree_sitter::QueryError; +use tree_sitter::{QueryError, QueryErrorKind}; #[derive(Debug)] pub struct Error(pub Vec); @@ -51,31 +51,19 @@ impl Error { } } -impl<'a> From for Error { - fn from(error: QueryError) -> Self { - match error { - QueryError::Capture(row, c) => Error::new(format!( - "Query error on line {}: Invalid capture name {}", - row, c - )), - QueryError::Field(row, f) => Error::new(format!( - "Query error on line {}: Invalid field name {}", - row, f - )), - QueryError::NodeType(row, t) => Error::new(format!( - "Query error on line {}. Invalid node type {}", - row, t - )), - QueryError::Syntax(row, l) => Error::new(format!( - "Query error on line {}. Invalid syntax:\n{}", - row, l - )), - QueryError::Structure(row, l) => Error::new(format!( - "Query error on line {}. Impossible pattern:\n{}", - row, l - )), - QueryError::Predicate(p) => Error::new(format!("Query error: {}", p)), +impl<'a> From<(&str, QueryError)> for Error { + fn from((path, error): (&str, QueryError)) -> Self { + let mut msg = format!("Query error at {}:{}. ", path, error.row + 1); + match error.kind { + QueryErrorKind::Capture => write!(&mut msg, "Invalid capture name {}", error.message), + QueryErrorKind::Field => write!(&mut msg, "Invalid field name {}", error.message), + QueryErrorKind::NodeType => write!(&mut msg, "Invalid node type {}", error.message), + QueryErrorKind::Syntax => write!(&mut msg, "Invalid syntax:\n{}", error.message), + QueryErrorKind::Structure => write!(&mut msg, "Impossible pattern:\n{}", error.message), + QueryErrorKind::Predicate => write!(&mut msg, "Invalid predicate: {}", error.message), } + .unwrap(); + Self::new(msg) } } diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 62cc9b62e5..3d026f219e 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -5,12 +5,13 @@ use regex::{Regex, RegexBuilder}; use serde_derive::Deserialize; use std::collections::HashMap; use std::io::BufReader; +use std::ops::Range; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Mutex; use std::time::SystemTime; use std::{fs, mem}; -use tree_sitter::Language; +use tree_sitter::{Language, QueryError}; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_tags::TagsConfiguration; @@ -543,13 +544,32 @@ impl Loader { impl<'a> LanguageConfiguration<'a> { pub fn highlight_config(&self, language: Language) -> Result> { + fn include_path_in_error<'a>( + mut error: QueryError, + ranges: &'a Vec<(String, Range)>, + source: &str, + start_offset: usize, + ) -> (&'a str, QueryError) { + let offset = error.offset - start_offset; + let (path, range) = ranges + .iter() + .find(|(_, range)| range.contains(&offset)) + .unwrap(); + error.row = source[range.start..offset] + .chars() + .filter(|c| *c == '\n') + .count(); + (path.as_ref(), error) + } + self.highlight_config .get_or_try_init(|| { - let highlights_query = + let (highlights_query, highlight_ranges) = self.read_queries(&self.highlights_filenames, "highlights.scm")?; - let injections_query = + let (injections_query, injection_ranges) = self.read_queries(&self.injections_filenames, "injections.scm")?; - let locals_query = self.read_queries(&self.locals_filenames, "locals.scm")?; + let (locals_query, locals_ranges) = + self.read_queries(&self.locals_filenames, "locals.scm")?; if highlights_query.is_empty() { Ok(None) @@ -560,9 +580,25 @@ impl<'a> LanguageConfiguration<'a> { &injections_query, &locals_query, ) - .map_err(Error::wrap(|| { - format!("Failed to load queries in {:?}", self.root_path) - }))?; + .map_err(|error| { + if error.offset < injections_query.len() { + include_path_in_error(error, &injection_ranges, &injections_query, 0) + } else if error.offset < injections_query.len() + locals_query.len() { + include_path_in_error( + error, + &locals_ranges, + &locals_query, + injections_query.len(), + ) + } else { + include_path_in_error( + error, + &highlight_ranges, + &highlights_query, + injections_query.len() + locals_query.len(), + ) + } + })?; let mut all_highlight_names = self.highlight_names.lock().unwrap(); if self.use_all_highlight_names { for capture_name in result.query.capture_names() { @@ -581,8 +617,8 @@ impl<'a> LanguageConfiguration<'a> { pub fn tags_config(&self, language: Language) -> Result> { self.tags_config .get_or_try_init(|| { - let tags_query = self.read_queries(&self.tags_filenames, "tags.scm")?; - let locals_query = self.read_queries(&self.locals_filenames, "locals.scm")?; + let (tags_query, _) = self.read_queries(&self.tags_filenames, "tags.scm")?; + let (locals_query, _) = self.read_queries(&self.locals_filenames, "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { @@ -596,27 +632,34 @@ impl<'a> LanguageConfiguration<'a> { .map(Option::as_ref) } - fn read_queries(&self, paths: &Option>, default_path: &str) -> Result { + fn read_queries( + &self, + paths: &Option>, + default_path: &str, + ) -> Result<(String, Vec<(String, Range)>)> { + let mut query = String::new(); + let mut path_ranges = Vec::new(); if let Some(paths) = paths.as_ref() { - let mut query = String::new(); for path in paths { - let path = self.root_path.join(path); - query += &fs::read_to_string(&path).map_err(Error::wrap(|| { + let abs_path = self.root_path.join(path); + let prev_query_len = query.len(); + query += &fs::read_to_string(&abs_path).map_err(Error::wrap(|| { format!("Failed to read query file {:?}", path) }))?; + path_ranges.push((path.clone(), prev_query_len..query.len())); } - Ok(query) } else { let queries_path = self.root_path.join("queries"); let path = queries_path.join(default_path); if path.exists() { - fs::read_to_string(&path).map_err(Error::wrap(|| { + query = fs::read_to_string(&path).map_err(Error::wrap(|| { format!("Failed to read query file {:?}", path) - })) - } else { - Ok(String::new()) + }))?; + path_ranges.push((default_path.to_string(), 0..query.len())); } } + + Ok((query, path_ranges)) } } diff --git a/cli/src/test.rs b/cli/src/test.rs index 1806c15020..7c143ecd71 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -102,14 +102,14 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { if path.exists() { for entry in fs::read_dir(path)? { let entry = entry?; - let hidden = entry.file_name().to_str().unwrap_or("").starts_with("."); + let filepath = entry.file_name(); + let filepath = filepath.to_str().unwrap_or(""); + let hidden = filepath.starts_with("."); if !hidden { let content = fs::read_to_string(entry.path()).map_err(Error::wrap(|| { format!("Error reading query file {:?}", entry.file_name()) }))?; - Query::new(language, &content).map_err(Error::wrap(|| { - format!("Error in query file {:?}", entry.file_name()) - }))?; + Query::new(language, &content).map_err(|e| (filepath, e))?; } } } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 5fd07680e7..2b816bbcd3 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -4,8 +4,8 @@ use lazy_static::lazy_static; use std::env; use std::fmt::Write; use tree_sitter::{ - Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryMatch, - QueryPredicate, QueryPredicateArg, QueryProperty, + Language, Node, Parser, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind, + QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, }; lazy_static! { @@ -26,109 +26,98 @@ fn test_query_errors_on_invalid_syntax() { // Mismatched parens assert_eq!( - Query::new(language, "(if_statement"), - Err(QueryError::Syntax( - 1, - [ - "(if_statement", // - " ^", - ] - .join("\n") - )) + Query::new(language, "(if_statement").unwrap_err().message, + [ + "(if_statement", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, "; comment 1\n; comment 2\n (if_statement))"), - Err(QueryError::Syntax( - 3, - [ - " (if_statement))", // - " ^", - ] - .join("\n") - )) + Query::new(language, "; comment 1\n; comment 2\n (if_statement))") + .unwrap_err() + .message, + [ + " (if_statement))", // + " ^", + ] + .join("\n") ); // Return an error at the *beginning* of a bare identifier not followed a colon. // If there's a colon but no pattern, return an error at the end of the colon. assert_eq!( - Query::new(language, "(if_statement identifier)"), - Err(QueryError::Syntax( - 1, - [ - "(if_statement identifier)", // - " ^", - ] - .join("\n") - )) + Query::new(language, "(if_statement identifier)") + .unwrap_err() + .message, + [ + "(if_statement identifier)", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, "(if_statement condition:)"), - Err(QueryError::Syntax( - 1, - [ - "(if_statement condition:)", // - " ^", - ] - .join("\n") - )) + Query::new(language, "(if_statement condition:)") + .unwrap_err() + .message, + [ + "(if_statement condition:)", // + " ^", + ] + .join("\n") ); // Return an error at the beginning of an unterminated string. assert_eq!( - Query::new(language, r#"(identifier) "h "#), - Err(QueryError::Syntax( - 1, - [ - r#"(identifier) "h "#, // - r#" ^"#, - ] - .join("\n") - )) + Query::new(language, r#"(identifier) "h "#) + .unwrap_err() + .message, + [ + r#"(identifier) "h "#, // + r#" ^"#, + ] + .join("\n") ); assert_eq!( - Query::new(language, r#"((identifier) ()"#), - Err(QueryError::Syntax( - 1, - [ - "((identifier) ()", // - " ^", - ] - .join("\n") - )) + Query::new(language, r#"((identifier) ()"#) + .unwrap_err() + .message, + [ + "((identifier) ()", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, r#"((identifier) [])"#), - Err(QueryError::Syntax( - 1, - [ - "((identifier) [])", // - " ^", - ] - .join("\n") - )) + Query::new(language, r#"((identifier) [])"#) + .unwrap_err() + .message, + [ + "((identifier) [])", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, r#"((identifier) (#a)"#), - Err(QueryError::Syntax( - 1, - [ - "((identifier) (#a)", // - " ^", - ] - .join("\n") - )) + Query::new(language, r#"((identifier) (#a)"#) + .unwrap_err() + .message, + [ + "((identifier) (#a)", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, r#"((identifier) @x (#eq? @x a"#), - Err(QueryError::Syntax( - 1, - [ - r#"((identifier) @x (#eq? @x a"#, - r#" ^"#, - ] - .join("\n") - )) + Query::new(language, r#"((identifier) @x (#eq? @x a"#) + .unwrap_err() + .message, + [ + r#"((identifier) @x (#eq? @x a"#, + r#" ^"#, + ] + .join("\n") ); }); } @@ -139,53 +128,97 @@ fn test_query_errors_on_invalid_symbols() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "(clas)"), - Err(QueryError::NodeType(1, "clas".to_string())) + Query::new(language, "(clas)").unwrap_err(), + QueryError { + row: 1, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "clas".to_string() + } ); assert_eq!( - Query::new(language, "(if_statement (arrayyyyy))"), - Err(QueryError::NodeType(1, "arrayyyyy".to_string())) + Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(), + QueryError { + row: 1, + offset: 15, + column: 15, + kind: QueryErrorKind::NodeType, + message: "arrayyyyy".to_string() + }, ); assert_eq!( - Query::new(language, "(if_statement condition: (non_existent3))"), - Err(QueryError::NodeType(1, "non_existent3".to_string())) + Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(), + QueryError { + row: 1, + offset: 26, + column: 26, + kind: QueryErrorKind::NodeType, + message: "non_existent3".to_string() + }, ); assert_eq!( - Query::new(language, "(if_statement condit: (identifier))"), - Err(QueryError::Field(1, "condit".to_string())) + Query::new(language, "(if_statement condit: (identifier))").unwrap_err(), + QueryError { + row: 1, + offset: 14, + column: 14, + kind: QueryErrorKind::Field, + message: "condit".to_string() + }, ); assert_eq!( - Query::new(language, "(if_statement conditioning: (identifier))"), - Err(QueryError::Field(1, "conditioning".to_string())) + Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(), + QueryError { + row: 1, + offset: 14, + column: 14, + kind: QueryErrorKind::Field, + message: "conditioning".to_string() + } ); }); } #[test] -fn test_query_errors_on_invalid_conditions() { +fn test_query_errors_on_invalid_predicates() { allocations::record(|| { let language = get_language("javascript"); assert_eq!( - Query::new(language, "((identifier) @id (@id))"), - Err(QueryError::Syntax( - 1, - [ + Query::new(language, "((identifier) @id (@id))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Syntax, + row: 1, + column: 19, + offset: 19, + message: [ "((identifier) @id (@id))", // " ^" ] .join("\n") - )) + } ); assert_eq!( - Query::new(language, "((identifier) @id (#eq? @id))"), - Err(QueryError::Predicate( - "Wrong number of arguments to #eq? predicate. Expected 2, got 1.".to_string() - )) + Query::new(language, "((identifier) @id (#eq? @id))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Predicate, + row: 0, + column: 0, + offset: 0, + message: "Wrong number of arguments to #eq? predicate. Expected 2, got 1." + .to_string() + } ); assert_eq!( - Query::new(language, "((identifier) @id (#eq? @id @ok))"), - Err(QueryError::Capture(1, "ok".to_string())) + Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Capture, + row: 1, + column: 29, + offset: 29, + message: "ok".to_string(), + } ); }); } @@ -201,14 +234,17 @@ fn test_query_errors_on_impossible_patterns() { js_lang, "(binary_expression left: (identifier) left: (identifier))" ), - Err(QueryError::Structure( - 1, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 1, + offset: 38, + column: 38, + message: [ "(binary_expression left: (identifier) left: (identifier))", " ^" ] .join("\n"), - )) + }) ); Query::new( @@ -218,27 +254,33 @@ fn test_query_errors_on_impossible_patterns() { .unwrap(); assert_eq!( Query::new(js_lang, "(function_declaration name: (statement_block))"), - Err(QueryError::Structure( - 1, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 1, + offset: 22, + column: 22, + message: [ "(function_declaration name: (statement_block))", " ^", ] .join("\n") - )) + }) ); Query::new(rb_lang, "(call receiver:(call))").unwrap(); assert_eq!( Query::new(rb_lang, "(call receiver:(binary))"), - Err(QueryError::Structure( - 1, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 1, + offset: 6, + column: 6, + message: [ "(call receiver:(binary))", // " ^", ] .join("\n") - )) + }) ); Query::new( @@ -259,37 +301,46 @@ fn test_query_errors_on_impossible_patterns() { (generator_function_declaration (identifier)) ]", ), - Err(QueryError::Structure( - 3, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 3, + offset: 88, + column: 42, + message: [ " (function_declaration (object))", // " ^", ] .join("\n") - )) + }) ); assert_eq!( Query::new(js_lang, "(identifier (identifier))",), - Err(QueryError::Structure( - 1, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 1, + offset: 12, + column: 12, + message: [ "(identifier (identifier))", // " ^", ] .join("\n") - )) + }) ); assert_eq!( Query::new(js_lang, "(true (true))",), - Err(QueryError::Structure( - 1, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 1, + offset: 6, + column: 6, + message: [ "(true (true))", // " ^", ] .join("\n") - )) + }) ); Query::new( @@ -298,16 +349,20 @@ fn test_query_errors_on_impossible_patterns() { condition: (parenthesized_expression (_expression) @cond))", ) .unwrap(); + assert_eq!( Query::new(js_lang, "(if_statement condition: (_expression))",), - Err(QueryError::Structure( - 1, - [ + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 1, + offset: 14, + column: 14, + message: [ "(if_statement condition: (_expression))", // " ^", ] .join("\n") - )) + }) ); }); } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index ea5893b426..75ed361fd3 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -157,13 +157,22 @@ pub struct IncludedRangesError(pub usize); /// An error that occurred when trying to create a `Query`. #[derive(Debug, PartialEq, Eq)] -pub enum QueryError { - Syntax(usize, String), - NodeType(usize, String), - Field(usize, String), - Capture(usize, String), - Predicate(String), - Structure(usize, String), +pub struct QueryError { + pub row: usize, + pub column: usize, + pub offset: usize, + pub message: String, + pub kind: QueryErrorKind, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum QueryErrorKind { + Syntax, + NodeType, + Field, + Capture, + Predicate, + Structure, } #[derive(Debug)] @@ -1175,8 +1184,11 @@ impl Query { None } }); + let column = offset - line_start; - return match error_type { + let kind; + let message; + match error_type { // Error types that report names ffi::TSQueryError_TSQueryErrorNodeType | ffi::TSQueryError_TSQueryErrorField @@ -1185,34 +1197,36 @@ impl Query { let end_offset = suffix .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') .unwrap_or(source.len()); - let name = suffix.split_at(end_offset).0.to_string(); - match error_type { - ffi::TSQueryError_TSQueryErrorNodeType => { - Err(QueryError::NodeType(row, name)) - } - ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(row, name)), - ffi::TSQueryError_TSQueryErrorCapture => { - Err(QueryError::Capture(row, name)) - } + message = suffix.split_at(end_offset).0.to_string(); + kind = match error_type { + ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType, + ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field, + ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture, _ => unreachable!(), - } + }; } // Error types that report positions _ => { - let message = if let Some(line) = line_containing_error { + message = if let Some(line) = line_containing_error { line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" } else { "Unexpected EOF".to_string() }; - match error_type { - ffi::TSQueryError_TSQueryErrorStructure => { - Err(QueryError::Structure(row, message)) - } - _ => Err(QueryError::Syntax(row, message)), - } + kind = match error_type { + ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure, + _ => QueryErrorKind::Syntax, + }; } }; + + return Err(QueryError { + row, + column, + offset, + kind, + message, + }); } let string_count = unsafe { ffi::ts_query_string_count(ptr) }; @@ -1261,6 +1275,13 @@ impl Query { slice::from_raw_parts(raw_predicates, length as usize) }; + let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; + let row = source + .char_indices() + .take_while(|(i, _)| *i < byte_offset as usize) + .filter(|(_, c)| *c == '\n') + .count(); + let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; @@ -1275,10 +1296,13 @@ impl Query { } if p[0].type_ != type_string { - return Err(QueryError::Predicate(format!( - "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], - ))); + return Err(predicate_error( + row, + format!( + "Expected predicate to start with a function name. Got @{}.", + result.capture_names[p[0].value_id as usize], + ), + )); } // Build a predicate for each of the known predicate function names. @@ -1286,13 +1310,16 @@ impl Query { match operator_name.as_str() { "eq?" | "not-eq?" => { if p.len() != 3 { - return Err(QueryError::Predicate(format!( + return Err(predicate_error( + row, + format!( "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", p.len() - 1 - ))); + ), + )); } if p[1].type_ != type_capture { - return Err(QueryError::Predicate(format!( + return Err(predicate_error(row, format!( "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); @@ -1316,19 +1343,19 @@ impl Query { "match?" | "not-match?" => { if p.len() != 3 { - return Err(QueryError::Predicate(format!( + return Err(predicate_error(row, format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { - return Err(QueryError::Predicate(format!( + return Err(predicate_error(row, format!( "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } if p[2].type_ == type_capture { - return Err(QueryError::Predicate(format!( + return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", result.capture_names[p[2].value_id as usize], ))); @@ -1339,14 +1366,15 @@ impl Query { text_predicates.push(TextPredicate::CaptureMatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { - QueryError::Predicate(format!("Invalid regex '{}'", regex)) + predicate_error(row, format!("Invalid regex '{}'", regex)) })?, is_positive, )); } "set!" => property_settings.push(Self::parse_property( - "set!", + row, + &operator_name, &result.capture_names, &string_values, &p[1..], @@ -1354,6 +1382,7 @@ impl Query { "is?" | "is-not?" => property_predicates.push(( Self::parse_property( + row, &operator_name, &result.capture_names, &string_values, @@ -1476,17 +1505,21 @@ impl Query { } fn parse_property( + row: usize, function_name: &str, capture_names: &[String], string_values: &[String], args: &[ffi::TSQueryPredicateStep], ) -> Result { if args.len() == 0 || args.len() > 3 { - return Err(QueryError::Predicate(format!( - "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", - function_name, - args.len(), - ))); + return Err(predicate_error( + row, + format!( + "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", + function_name, + args.len(), + ), + )); } let mut capture_id = None; @@ -1496,10 +1529,13 @@ impl Query { for arg in args { if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture { if capture_id.is_some() { - return Err(QueryError::Predicate(format!( - "Invalid arguments to {} predicate. Unexpected second capture name @{}", - function_name, capture_names[arg.value_id as usize] - ))); + return Err(predicate_error( + row, + format!( + "Invalid arguments to {} predicate. Unexpected second capture name @{}", + function_name, capture_names[arg.value_id as usize] + ), + )); } capture_id = Some(arg.value_id as usize); } else if key.is_none() { @@ -1507,20 +1543,26 @@ impl Query { } else if value.is_none() { value = Some(string_values[arg.value_id as usize].as_str()); } else { - return Err(QueryError::Predicate(format!( - "Invalid arguments to {} predicate. Unexpected third argument @{}", - function_name, string_values[arg.value_id as usize] - ))); + return Err(predicate_error( + row, + format!( + "Invalid arguments to {} predicate. Unexpected third argument @{}", + function_name, string_values[arg.value_id as usize] + ), + )); } } if let Some(key) = key { Ok(QueryProperty::new(key, value, capture_id)) } else { - return Err(QueryError::Predicate(format!( - "Invalid arguments to {} predicate. Missing key argument", - function_name, - ))); + return Err(predicate_error( + row, + format!( + "Invalid arguments to {} predicate. Missing key argument", + function_name, + ), + )); } } } @@ -1770,6 +1812,16 @@ impl<'a> Into for &'a InputEdit { } } +fn predicate_error(row: usize, message: String) -> QueryError { + QueryError { + kind: QueryErrorKind::Predicate, + row, + column: 0, + offset: 0, + message, + } +} + unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} From ba239ce4ab6066d8a8dfc6bba8ce8886d54ab391 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 15:03:51 -0700 Subject: [PATCH 184/282] Make query error line numbers consistently display 1-indexed --- cli/src/loader.rs | 88 ++++++++++++++++++++++++------------- cli/src/tags.rs | 7 ++- cli/src/tests/query_test.rs | 28 ++++++------ lib/binding_rust/lib.rs | 14 +++--- tags/src/lib.rs | 33 +++++++------- 5 files changed, 102 insertions(+), 68 deletions(-) diff --git a/cli/src/loader.rs b/cli/src/loader.rs index 3d026f219e..3d5a937700 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -13,7 +13,7 @@ use std::time::SystemTime; use std::{fs, mem}; use tree_sitter::{Language, QueryError}; use tree_sitter_highlight::HighlightConfiguration; -use tree_sitter_tags::TagsConfiguration; +use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -544,25 +544,8 @@ impl Loader { impl<'a> LanguageConfiguration<'a> { pub fn highlight_config(&self, language: Language) -> Result> { - fn include_path_in_error<'a>( - mut error: QueryError, - ranges: &'a Vec<(String, Range)>, - source: &str, - start_offset: usize, - ) -> (&'a str, QueryError) { - let offset = error.offset - start_offset; - let (path, range) = ranges - .iter() - .find(|(_, range)| range.contains(&offset)) - .unwrap(); - error.row = source[range.start..offset] - .chars() - .filter(|c| *c == '\n') - .count(); - (path.as_ref(), error) - } - - self.highlight_config + return self + .highlight_config .get_or_try_init(|| { let (highlights_query, highlight_ranges) = self.read_queries(&self.highlights_filenames, "highlights.scm")?; @@ -582,16 +565,21 @@ impl<'a> LanguageConfiguration<'a> { ) .map_err(|error| { if error.offset < injections_query.len() { - include_path_in_error(error, &injection_ranges, &injections_query, 0) + Self::include_path_in_query_error( + error, + &injection_ranges, + &injections_query, + 0, + ) } else if error.offset < injections_query.len() + locals_query.len() { - include_path_in_error( + Self::include_path_in_query_error( error, &locals_ranges, &locals_query, injections_query.len(), ) } else { - include_path_in_error( + Self::include_path_in_query_error( error, &highlight_ranges, &highlights_query, @@ -611,27 +599,67 @@ impl<'a> LanguageConfiguration<'a> { Ok(Some(result)) } }) - .map(Option::as_ref) + .map(Option::as_ref); } pub fn tags_config(&self, language: Language) -> Result> { self.tags_config .get_or_try_init(|| { - let (tags_query, _) = self.read_queries(&self.tags_filenames, "tags.scm")?; - let (locals_query, _) = self.read_queries(&self.locals_filenames, "locals.scm")?; + let (tags_query, tags_ranges) = + self.read_queries(&self.tags_filenames, "tags.scm")?; + let (locals_query, locals_ranges) = + self.read_queries(&self.locals_filenames, "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { TagsConfiguration::new(language, &tags_query, &locals_query) - .map_err(Error::wrap(|| { - format!("Failed to load queries in {:?}", self.root_path) - })) - .map(|config| Some(config)) + .map(Some) + .map_err(|error| { + if let TagsError::Query(error) = error { + if error.offset < locals_query.len() { + Self::include_path_in_query_error( + error, + &locals_ranges, + &locals_query, + 0, + ) + } else { + Self::include_path_in_query_error( + error, + &tags_ranges, + &tags_query, + locals_query.len(), + ) + } + .into() + } else { + error.into() + } + }) } }) .map(Option::as_ref) } + fn include_path_in_query_error<'b>( + mut error: QueryError, + ranges: &'b Vec<(String, Range)>, + source: &str, + start_offset: usize, + ) -> (&'b str, QueryError) { + let offset_within_section = error.offset - start_offset; + let (path, range) = ranges + .iter() + .find(|(_, range)| range.contains(&offset_within_section)) + .unwrap(); + error.offset = offset_within_section - range.start; + error.row = source[range.start..offset_within_section] + .chars() + .filter(|c| *c == '\n') + .count(); + (path.as_ref(), error) + } + fn read_queries( &self, paths: &Option>, diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 122b58d252..802d8d0654 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -53,7 +53,10 @@ pub fn generate_tags( let source = fs::read(path)?; let t0 = Instant::now(); - for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))?.0 { + for tag in context + .generate_tags(tags_config, &source, Some(&cancellation_flag))? + .0 + { let tag = tag?; if !quiet { write!( @@ -69,7 +72,7 @@ pub fn generate_tags( )?; if let Some(docs) = tag.docs { if docs.len() > 120 { - write!(&mut stdout, "\t{:?}...", &docs[0..120])?; + write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?; } else { write!(&mut stdout, "\t{:?}", &docs)?; } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 2b816bbcd3..efdaf78059 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -130,7 +130,7 @@ fn test_query_errors_on_invalid_symbols() { assert_eq!( Query::new(language, "(clas)").unwrap_err(), QueryError { - row: 1, + row: 0, offset: 1, column: 1, kind: QueryErrorKind::NodeType, @@ -140,7 +140,7 @@ fn test_query_errors_on_invalid_symbols() { assert_eq!( Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(), QueryError { - row: 1, + row: 0, offset: 15, column: 15, kind: QueryErrorKind::NodeType, @@ -150,7 +150,7 @@ fn test_query_errors_on_invalid_symbols() { assert_eq!( Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(), QueryError { - row: 1, + row: 0, offset: 26, column: 26, kind: QueryErrorKind::NodeType, @@ -160,7 +160,7 @@ fn test_query_errors_on_invalid_symbols() { assert_eq!( Query::new(language, "(if_statement condit: (identifier))").unwrap_err(), QueryError { - row: 1, + row: 0, offset: 14, column: 14, kind: QueryErrorKind::Field, @@ -170,7 +170,7 @@ fn test_query_errors_on_invalid_symbols() { assert_eq!( Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(), QueryError { - row: 1, + row: 0, offset: 14, column: 14, kind: QueryErrorKind::Field, @@ -189,7 +189,7 @@ fn test_query_errors_on_invalid_predicates() { Query::new(language, "((identifier) @id (@id))").unwrap_err(), QueryError { kind: QueryErrorKind::Syntax, - row: 1, + row: 0, column: 19, offset: 19, message: [ @@ -214,7 +214,7 @@ fn test_query_errors_on_invalid_predicates() { Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), QueryError { kind: QueryErrorKind::Capture, - row: 1, + row: 0, column: 29, offset: 29, message: "ok".to_string(), @@ -236,7 +236,7 @@ fn test_query_errors_on_impossible_patterns() { ), Err(QueryError { kind: QueryErrorKind::Structure, - row: 1, + row: 0, offset: 38, column: 38, message: [ @@ -256,7 +256,7 @@ fn test_query_errors_on_impossible_patterns() { Query::new(js_lang, "(function_declaration name: (statement_block))"), Err(QueryError { kind: QueryErrorKind::Structure, - row: 1, + row: 0, offset: 22, column: 22, message: [ @@ -272,7 +272,7 @@ fn test_query_errors_on_impossible_patterns() { Query::new(rb_lang, "(call receiver:(binary))"), Err(QueryError { kind: QueryErrorKind::Structure, - row: 1, + row: 0, offset: 6, column: 6, message: [ @@ -303,7 +303,7 @@ fn test_query_errors_on_impossible_patterns() { ), Err(QueryError { kind: QueryErrorKind::Structure, - row: 3, + row: 2, offset: 88, column: 42, message: [ @@ -318,7 +318,7 @@ fn test_query_errors_on_impossible_patterns() { Query::new(js_lang, "(identifier (identifier))",), Err(QueryError { kind: QueryErrorKind::Structure, - row: 1, + row: 0, offset: 12, column: 12, message: [ @@ -332,7 +332,7 @@ fn test_query_errors_on_impossible_patterns() { Query::new(js_lang, "(true (true))",), Err(QueryError { kind: QueryErrorKind::Structure, - row: 1, + row: 0, offset: 6, column: 6, message: [ @@ -354,7 +354,7 @@ fn test_query_errors_on_impossible_patterns() { Query::new(js_lang, "(if_statement condition: (_expression))",), Err(QueryError { kind: QueryErrorKind::Structure, - row: 1, + row: 0, offset: 14, column: 14, message: [ diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 75ed361fd3..97d10d13f8 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1174,16 +1174,16 @@ impl Query { let offset = error_offset as usize; let mut line_start = 0; let mut row = 0; - let line_containing_error = source.split("\n").find_map(|line| { - row += 1; + let mut line_containing_error = None; + for line in source.split("\n") { let line_end = line_start + line.len() + 1; if line_end > offset { - Some(line) - } else { - line_start = line_end; - None + line_containing_error = Some(line); + break; } - }); + line_start = line_end; + row += 1; + } let column = offset - line_start; let kind; diff --git a/tags/src/lib.rs b/tags/src/lib.rs index dd55d4bea6..576b04f8d7 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -271,21 +271,24 @@ impl TagsContext { .matches(&config.query, tree_ref.root_node(), move |node| { &source[node.byte_range()] }); - Ok((TagsIter { - _tree: tree, - matches, - source, - config, - cancellation_flag, - prev_line_info: None, - tag_queue: Vec::new(), - iter_count: 0, - scopes: vec![LocalScope { - range: 0..source.len(), - inherits: false, - local_defs: Vec::new(), - }], - }, tree_ref.root_node().has_error())) + Ok(( + TagsIter { + _tree: tree, + matches, + source, + config, + cancellation_flag, + prev_line_info: None, + tag_queue: Vec::new(), + iter_count: 0, + scopes: vec![LocalScope { + range: 0..source.len(), + inherits: false, + local_defs: Vec::new(), + }], + }, + tree_ref.root_node().has_error(), + )) } } From 7361cf9a0b38621c6fc1a48e52a2a2b73c452c29 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 15:05:34 -0700 Subject: [PATCH 185/282] rust binding: 0.17.0 --- Cargo.lock | 8 ++++---- lib/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21277ff978..39ff6a52f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,7 +732,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.16.1" +version = "0.17.0" dependencies = [ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -762,7 +762,7 @@ dependencies = [ "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.16.1", + "tree-sitter 0.17.0", "tree-sitter-highlight 0.2.0", "tree-sitter-tags 0.2.0", "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -773,7 +773,7 @@ name = "tree-sitter-highlight" version = "0.2.0" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.16.1", + "tree-sitter 0.17.0", ] [[package]] @@ -782,7 +782,7 @@ version = "0.2.0" dependencies = [ "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.16.1", + "tree-sitter 0.17.0", ] [[package]] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 960ca2da43..c5dbf80335 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.16.1" +version = "0.17.0" authors = ["Max Brunsfeld "] license = "MIT" readme = "binding_rust/README.md" From 7565604626733cf2a0953e28239dfb5fc6bcc1ee Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 15:06:19 -0700 Subject: [PATCH 186/282] web: 0.17.1 --- lib/binding_web/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/package.json b/lib/binding_web/package.json index 463659fef6..8c61cf750c 100644 --- a/lib/binding_web/package.json +++ b/lib/binding_web/package.json @@ -1,6 +1,6 @@ { "name": "web-tree-sitter", - "version": "0.17.0", + "version": "0.17.1", "description": "Tree-sitter bindings for the web", "main": "tree-sitter.js", "types": "tree-sitter-web.d.ts", From 28557ea8b4923fe72ab041d6348e1bea87e05ce1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 15:07:56 -0700 Subject: [PATCH 187/282] highlight: 0.3 --- Cargo.lock | 4 ++-- highlight/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39ff6a52f1..dcd9421f09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -763,14 +763,14 @@ dependencies = [ "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.17.0", - "tree-sitter-highlight 0.2.0", + "tree-sitter-highlight 0.3.0", "tree-sitter-tags 0.2.0", "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "tree-sitter-highlight" -version = "0.2.0" +version = "0.3.0" dependencies = [ "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.17.0", diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index bb94fb2173..7f8fc04aa2 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.2.0" +version = "0.3.0" authors = [ "Max Brunsfeld ", "Tim Clem " From 5080de496a87fc2b9609af80e3d893a04e2b868c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 15:08:19 -0700 Subject: [PATCH 188/282] 0.17.1 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcd9421f09..69317824e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,7 +740,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.17.0" +version = "0.17.1" dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index bd065b7134..bcc7b42d2f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.17.0" +version = "0.17.1" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 8ad062d898..42f75c9888 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.17.0", + "version": "0.17.1", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 939cdf12b95d441e49b902dd8128911c1fa65608 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 29 Sep 2020 12:34:25 -0400 Subject: [PATCH 189/282] Add --stats flag for reporting parse information. --- cli/src/main.rs | 10 ++++++++++ cli/src/parse.rs | 24 +++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 2f8c6dd544..4bce3d4335 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -64,6 +64,7 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("stat").long("stat").short("s")) .arg(Arg::with_name("time").long("time").short("t")) .arg(Arg::with_name("allow-cancellation").long("cancel")) .arg(Arg::with_name("timeout").long("timeout").takes_value(true)) @@ -234,6 +235,9 @@ fn run() -> error::Result<()> { let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap(); let mut has_error = false; loader.find_all_languages(&config.parser_directories)?; + + let mut stats : parse::Stats = Default::default(); + for path in paths { let path = Path::new(&path); let language = @@ -249,8 +253,14 @@ fn run() -> error::Result<()> { debug, debug_graph, allow_cancellation, + &mut stats, )?; } + + if matches.is_present("stat") { + println!("{}", stats) + } + if has_error { return Error::err(String::new()); } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 13bac0f371..53ad859ee6 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -4,7 +4,7 @@ use std::io::{self, Write}; use std::path::Path; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Instant; -use std::{fs, thread, usize}; +use std::{fmt, fs, thread, usize}; use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree}; #[derive(Debug)] @@ -14,6 +14,22 @@ pub struct Edit { pub inserted_text: Vec, } +#[derive(Debug, Default)] +pub struct Stats { + successful_parses : usize, + total_parses : usize, +} + +impl fmt::Display for Stats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%", + self.total_parses, + self.successful_parses, + self.total_parses - self.successful_parses, + (self.successful_parses as f64) / (self.total_parses as f64) * 100.0); + } +} + pub fn parse_file_at_path( language: Language, path: &Path, @@ -25,6 +41,7 @@ pub fn parse_file_at_path( debug: bool, debug_graph: bool, allow_cancellation: bool, + stats: &mut Stats, ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); @@ -161,6 +178,11 @@ pub fn parse_file_at_path( } } + stats.total_parses += 1; + if first_error.is_none() { + stats.successful_parses += 1; + } + if first_error.is_some() || print_time { write!( &mut stdout, From 9f9f2a52b7276bed9a72d3d328f5ce3abe8da46a Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 29 Sep 2020 12:59:05 -0400 Subject: [PATCH 190/282] Docs. --- docs/section-3-creating-parsers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 694f8daeae..779991908c 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -184,10 +184,10 @@ You can run your parser on an arbitrary file using `tree-sitter parse`. This wil (int_literal [1, 9] - [1, 10])))))) ``` -You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error: +You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. Additionally, the `--stat` flag prints out aggregated parse success/failure information for all processed files. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error: ```sh -tree-sitter parse 'examples/**/*.go' --quiet +tree-sitter parse 'examples/**/*.go' --quiet --stat ``` ### Command: `highlight` From 16bd061b33aecdc1d40ead8c8fa6d3d4cb6c1bff Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 29 Sep 2020 15:43:30 -0400 Subject: [PATCH 191/282] Have the caller track stats here. --- cli/src/main.rs | 16 +++++++++++++--- cli/src/parse.rs | 10 ++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 4bce3d4335..afbe39e207 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -236,13 +236,15 @@ fn run() -> error::Result<()> { let mut has_error = false; loader.find_all_languages(&config.parser_directories)?; + let should_track_stats = matches.is_present("stat"); let mut stats : parse::Stats = Default::default(); for path in paths { let path = Path::new(&path); let language = select_language(&mut loader, path, ¤t_dir, matches.value_of("scope"))?; - has_error |= parse::parse_file_at_path( + + let this_file_errored = parse::parse_file_at_path( language, path, &edits, @@ -253,11 +255,19 @@ fn run() -> error::Result<()> { debug, debug_graph, allow_cancellation, - &mut stats, )?; + + if should_track_stats { + stats.total_parses += 1; + if !this_file_errored { + stats.successful_parses += 1; + } + } + + has_error |= this_file_errored; } - if matches.is_present("stat") { + if should_track_stats { println!("{}", stats) } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 53ad859ee6..568b2c527e 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -16,8 +16,8 @@ pub struct Edit { #[derive(Debug, Default)] pub struct Stats { - successful_parses : usize, - total_parses : usize, + pub successful_parses : usize, + pub total_parses : usize, } impl fmt::Display for Stats { @@ -41,7 +41,6 @@ pub fn parse_file_at_path( debug: bool, debug_graph: bool, allow_cancellation: bool, - stats: &mut Stats, ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); @@ -178,11 +177,6 @@ pub fn parse_file_at_path( } } - stats.total_parses += 1; - if first_error.is_none() { - stats.successful_parses += 1; - } - if first_error.is_some() || print_time { write!( &mut stdout, From 33435f43c0c641b2199362ccb1afc73dd0947823 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 30 Sep 2020 09:28:58 -0400 Subject: [PATCH 192/282] Take Max's suggestions. --- cli/src/main.rs | 2 +- cli/src/parse.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index afbe39e207..620f329f4e 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -237,7 +237,7 @@ fn run() -> error::Result<()> { loader.find_all_languages(&config.parser_directories)?; let should_track_stats = matches.is_present("stat"); - let mut stats : parse::Stats = Default::default(); + let mut stats = parse::Stats::default(); for path in paths { let path = Path::new(&path); diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 568b2c527e..499bef1f33 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -16,8 +16,8 @@ pub struct Edit { #[derive(Debug, Default)] pub struct Stats { - pub successful_parses : usize, - pub total_parses : usize, + pub successful_parses: usize, + pub total_parses: usize, } impl fmt::Display for Stats { From 470733b3238ed692de62e9efbf6952ea8cbe82f3 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 30 Sep 2020 15:49:42 -0400 Subject: [PATCH 193/282] Fix crash when nonexistent files were passed to `parse`. We were unwrapping the result of counting the characters in the vector returned by collect_files(), which, if that vector is empty, returns None. The most correct behavior is to halt if a nonexistent filename was provided or a glob failed. --- cli/src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cli/src/main.rs b/cli/src/main.rs index 620f329f4e..0b470c743d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -426,6 +426,10 @@ fn collect_paths<'a>( } } + if result.is_empty() { + Error::err("No files were found at or matched by the provided pathname/glob".to_string())?; + } + return Ok(result); } From 683a2da055ca223098298fae27b729dbcf13f69e Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 30 Sep 2020 16:19:34 -0400 Subject: [PATCH 194/282] Fix crash when extras function doesn't return an array. Fixes #745, which failed due to attempting to call `map` on a non-array. This bails out at the same spot, but with a more illuminating error message. --- cli/src/generate/dsl.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 55594871e8..62fb1d70e8 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -292,7 +292,12 @@ function grammar(baseGrammar, options) { extras = options.extras .call(ruleBuilder, ruleBuilder, baseGrammar.extras) - .map(normalize); + + if (!Array.isArray(extras)) { + throw new Error("Grammar's 'extras' function must return an array.") + } + + extras = extras.map(normalize); } let word = baseGrammar.word; From 08356f79c778eaa8a0ea52165f7fb94a39c8c493 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 24 Sep 2020 15:22:52 -0700 Subject: [PATCH 195/282] Bump sibling cargo deps --- Cargo.lock | 4 ++-- cli/Cargo.toml | 4 ++-- lib/Cargo.toml | 4 +++- tags/Cargo.toml | 4 ++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69317824e1..a85e9dada1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -764,7 +764,7 @@ dependencies = [ "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.17.0", "tree-sitter-highlight 0.3.0", - "tree-sitter-tags 0.2.0", + "tree-sitter-tags 0.3.0", "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -778,7 +778,7 @@ dependencies = [ [[package]] name = "tree-sitter-tags" -version = "0.2.0" +version = "0.3.0" dependencies = [ "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index bcc7b42d2f..c3d183e1ef 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -37,11 +37,11 @@ tiny_http = "0.6" webbrowser = "0.5.1" [dependencies.tree-sitter] -version = ">= 0.3.7" +version = ">= 0.17.0" path = "../lib" [dependencies.tree-sitter-highlight] -version = ">= 0.1.0" +version = ">= 0.3.0" path = "../highlight" [dependencies.tree-sitter-tags] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c5dbf80335..2d13278896 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -15,7 +15,9 @@ include = [ "/binding_rust/*", "/Cargo.toml", "/include/*", - "/src/*", + "/src/*.h", + "/src/*.c", + "/src/unicode/*", ] [dependencies] diff --git a/tags/Cargo.toml b/tags/Cargo.toml index 531b54b4ce..db73bb723d 100644 --- a/tags/Cargo.toml +++ b/tags/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-tags" description = "Library for extracting tag information" -version = "0.2.0" +version = "0.3.0" authors = [ "Max Brunsfeld ", "Patrick Thomson " @@ -21,5 +21,5 @@ regex = "1" memchr = "2.3" [dependencies.tree-sitter] -version = ">= 0.3.7" +version = ">= 0.17.0" path = "../lib" From 2a3c2ad6b98d9c04ac7941ab59cd13aeb3141afe Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 30 Sep 2020 21:19:22 -0700 Subject: [PATCH 196/282] Implement Send for QueryCursor --- lib/binding_rust/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 97d10d13f8..47b8d19de5 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1826,5 +1826,6 @@ unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} unsafe impl Send for Tree {} +unsafe impl Send for QueryCursor {} unsafe impl Sync for Language {} unsafe impl Sync for Query {} From d1c95193c1228537fcf5f7f47f76ae631edd541b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 5 Oct 2020 12:07:16 -0700 Subject: [PATCH 197/282] query: Fix invalid use of slice::from_raw_parts --- cli/src/tests/query_test.rs | 30 ++++++++++++++++++++++++++++++ lib/binding_rust/lib.rs | 22 ++++++++++++++++++++-- lib/src/query.c | 4 ++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index efdaf78059..323a13fcd0 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1691,6 +1691,36 @@ fn test_query_matches_with_multiple_captures_on_a_node() { }); } +#[test] +fn test_query_matches_with_no_captures() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (identifier) + (string) @s + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = 'hi'; + b = 'bye'; + ", + &[ + (0, vec![]), + (1, vec![("s", "'hi'")]), + (0, vec![]), + (1, vec![("s", "'bye'")]), + ], + ); + }); +} + #[test] fn test_query_captures_basic() { allocations::record(|| { diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 47b8d19de5..372d937f75 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -138,7 +138,7 @@ pub struct QueryCaptures<'a, T: AsRef<[u8]>> { } /// A particular `Node` that has been captured with a particular name within a `Query`. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] #[repr(C)] pub struct QueryCapture<'a> { pub node: Node<'a>, @@ -1272,7 +1272,11 @@ impl Query { let mut length = 0u32; let raw_predicates = ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); + if length > 0 { slice::from_raw_parts(raw_predicates, length as usize) + } else { + &[] + } }; let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; @@ -1649,11 +1653,15 @@ impl<'a> QueryMatch<'a> { cursor, id: m.id, pattern_index: m.pattern_index as usize, - captures: unsafe { + captures: if m.capture_count > 0 { + unsafe { slice::from_raw_parts( m.captures as *const QueryCapture<'a>, m.capture_count as usize, ) + } + } else { + &[] }, } } @@ -1729,6 +1737,16 @@ impl<'a, T: AsRef<[u8]>> Iterator for QueryCaptures<'a, T> { } } +impl<'a> fmt::Debug for QueryMatch<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "QueryMatch {{ id: {}, pattern_index: {}, captures: {:?} }}", + self.id, self.pattern_index, self.captures + ) + } +} + impl PartialEq for Query { fn eq(&self, other: &Self) -> bool { self.ptr == other.ptr diff --git a/lib/src/query.c b/lib/src/query.c index b6e4895174..ce0e4cdfaf 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1710,6 +1710,8 @@ static TSQueryError ts_query__parse_pattern( stream_reset(stream, node_name); return TSQueryErrorNodeType; } + + stream_skip_whitespace(stream); } // Parse the child patterns @@ -2518,6 +2520,7 @@ static inline bool ts_query_cursor__advance( } else if (ts_tree_cursor_goto_parent(&self->cursor)) { self->depth--; } else { + LOG("halt at root"); self->halted = true; } @@ -2582,6 +2585,7 @@ static inline bool ts_query_cursor__advance( self->end_byte <= ts_node_start_byte(node) || point_lte(self->end_point, ts_node_start_point(node)) ) { + LOG("halt at end of range"); self->halted = true; continue; } From 857a9ed07b983e2e6cff6dc851fcf9b37aec8e5a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 8 Oct 2020 12:34:08 -0700 Subject: [PATCH 198/282] query: Handle captured wildcard nodes at the root of patterns --- cli/src/tests/query_test.rs | 87 ++++++++++++++++++++++++++ lib/binding_rust/lib.rs | 10 +-- lib/src/query.c | 121 +++++++++++++++++++++++++++--------- lib/src/tree_cursor.c | 27 ++++++++ lib/src/tree_cursor.h | 2 + 5 files changed, 212 insertions(+), 35 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 323a13fcd0..1f7ddaffd6 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1691,6 +1691,93 @@ fn test_query_matches_with_multiple_captures_on_a_node() { }); } +#[test] +fn test_query_matches_with_captured_wildcard_at_root() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + language, + " + ; captured wildcard at the root + (_ [ + (except_clause (block) @block) + (finally_clause (block) @block) + ]) @stmt + + [ + (while_statement (block) @block) + (if_statement (block) @block) + + ; captured wildcard at the root within an alternation + (_ [ + (else_clause (block) @block) + (elif_clause (block) @block) + ]) + + (try_statement (block) @block) + (for_statement (block) @block) + ] @stmt + ", + ) + .unwrap(); + + let source = " + for i in j: + while True: + if a: + print b + elif c: + print d + else: + try: + print f + except: + print g + finally: + print h + else: + print i + " + .trim(); + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let match_capture_names_and_rows = cursor + .matches(&query, tree.root_node(), to_callback(source)) + .map(|m| { + m.captures + .iter() + .map(|c| { + ( + query.capture_names()[c.index as usize].as_str(), + c.node.kind(), + c.node.start_position().row, + ) + }) + .collect::>() + }) + .collect::>(); + + assert_eq!( + match_capture_names_and_rows, + &[ + vec![("stmt", "for_statement", 0), ("block", "block", 1)], + vec![("stmt", "while_statement", 1), ("block", "block", 2)], + vec![("stmt", "if_statement", 2), ("block", "block", 3)], + vec![("stmt", "if_statement", 2), ("block", "block", 5)], + vec![("stmt", "if_statement", 2), ("block", "block", 7)], + vec![("stmt", "try_statement", 7), ("block", "block", 8)], + vec![("stmt", "try_statement", 7), ("block", "block", 10)], + vec![("stmt", "try_statement", 7), ("block", "block", 12)], + vec![("stmt", "while_statement", 1), ("block", "block", 14)], + ] + ) + }); +} + #[test] fn test_query_matches_with_no_captures() { allocations::record(|| { diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 372d937f75..b33beded50 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1273,7 +1273,7 @@ impl Query { let raw_predicates = ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); if length > 0 { - slice::from_raw_parts(raw_predicates, length as usize) + slice::from_raw_parts(raw_predicates, length as usize) } else { &[] } @@ -1655,10 +1655,10 @@ impl<'a> QueryMatch<'a> { pattern_index: m.pattern_index as usize, captures: if m.capture_count > 0 { unsafe { - slice::from_raw_parts( - m.captures as *const QueryCapture<'a>, - m.capture_count as usize, - ) + slice::from_raw_parts( + m.captures as *const QueryCapture<'a>, + m.capture_count as usize, + ) } } else { &[] diff --git a/lib/src/query.c b/lib/src/query.c index ce0e4cdfaf..133762b908 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -138,6 +138,7 @@ typedef struct { bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; bool dead: 1; + bool needs_parent: 1; } QueryState; typedef Array(TSQueryCapture) CaptureList; @@ -2011,20 +2012,24 @@ TSQuery *ts_query_new( return NULL; } - // If a pattern has a wildcard at its root, optimize the matching process - // by skipping matching the wildcard. - if ( - self->steps.contents[start_step_index].symbol == WILDCARD_SYMBOL - ) { - QueryStep *second_step = &self->steps.contents[start_step_index + 1]; - if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth != PATTERN_DONE_MARKER) { - start_step_index += 1; - } - } - // Maintain a map that can look up patterns for a given root symbol. + uint16_t wildcard_root_alternative_index = NONE; for (;;) { QueryStep *step = &self->steps.contents[start_step_index]; + + // If a pattern has a wildcard at its root, but it has a non-wildcard child, + // then optimize the matching process by skipping matching the wildcard. + // Later, during the matching process, the query cursor will check that + // there is a parent node, and capture it if necessary. + if (step->symbol == WILDCARD_SYMBOL && step->depth == 0) { + QueryStep *second_step = &self->steps.contents[start_step_index + 1]; + if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { + wildcard_root_alternative_index = step->alternative_index; + start_step_index += 1; + step = second_step; + } + } + ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index); if (step->symbol == WILDCARD_SYMBOL) { self->wildcard_root_pattern_count++; @@ -2035,6 +2040,9 @@ TSQuery *ts_query_new( if (step->alternative_index != NONE) { start_step_index = step->alternative_index; step->alternative_index = NONE; + } else if (wildcard_root_alternative_index != NONE) { + start_step_index = wildcard_root_alternative_index; + wildcard_root_alternative_index = NONE; } else { break; } @@ -2386,8 +2394,8 @@ static void ts_query_cursor__add_state( if (prev_state->start_depth == start_depth) { if (prev_state->pattern_index < pattern->pattern_index) break; if (prev_state->pattern_index == pattern->pattern_index) { - // Avoid unnecessarily inserting an unnecessary duplicate state, - // which would be immediately pruned by the longest-match criteria. + // Avoid inserting an unnecessary duplicate state, which would be + // immediately pruned by the longest-match criteria. if (prev_state->step_index == pattern->step_index) return; } } @@ -2407,6 +2415,7 @@ static void ts_query_cursor__add_state( .consumed_capture_count = 0, .seeking_immediate_match = true, .has_in_progress_alternatives = false, + .needs_parent = step->depth == 1, .dead = false, })); } @@ -2460,6 +2469,33 @@ static CaptureList *ts_query_cursor__prepare_to_capture( return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); } +static void ts_query_cursor__capture( + TSQueryCursor *self, + QueryState *state, + QueryStep *step, + TSNode node +) { + if (state->dead) return; + CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); + if (!capture_list) { + state->dead = true; + return; + } + + for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { + uint16_t capture_id = step->capture_ids[j]; + if (step->capture_ids[j] == NONE) break; + array_push(capture_list, ((TSQueryCapture) { node, capture_id })); + LOG( + " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", + ts_node_type(node), + state->pattern_index, + capture_id, + capture_list->size + ); + } +} + // Duplicate the given state and insert the newly-created state immediately after // the given state in the `states` array. Ensures that the given state reference is // still valid, even if the states array is reallocated. @@ -2730,26 +2766,45 @@ static inline bool ts_query_cursor__advance( } } + // If this pattern started with a wildcard, such that the pattern map + // actually points to the *second* step of the pattern, then check + // that the node has a parent, and capture the parent node if necessary. + if (state->needs_parent) { + TSNode parent = ts_tree_cursor_parent_node(&self->cursor); + if (ts_node_is_null(parent)) { + LOG(" missing parent node\n"); + state->dead = true; + } else { + state->needs_parent = false; + QueryStep *skipped_wildcard_step = step; + do { + skipped_wildcard_step--; + } while ( + skipped_wildcard_step->is_dead_end || + skipped_wildcard_step->is_pass_through || + skipped_wildcard_step->depth > 0 + ); + if (skipped_wildcard_step->capture_ids[0] != NONE) { + LOG(" capture wildcard parent\n"); + ts_query_cursor__capture( + self, + state, + skipped_wildcard_step, + parent + ); + } + } + } + // If the current node is captured in this pattern, add it to the capture list. if (step->capture_ids[0] != NONE) { - CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); - if (!capture_list) { - array_erase(&self->states, i); - i--; - continue; - } + ts_query_cursor__capture(self, state, step, node); + } - for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { - uint16_t capture_id = step->capture_ids[j]; - if (step->capture_ids[j] == NONE) break; - array_push(capture_list, ((TSQueryCapture) { node, capture_id })); - LOG( - " capture node. pattern:%u, capture_id:%u, capture_count:%u\n", - state->pattern_index, - capture_id, - capture_list->size - ); - } + if (state->dead) { + array_erase(&self->states, i); + i--; + continue; } // Advance this state to the next step of its pattern. @@ -2772,12 +2827,18 @@ static inline bool ts_query_cursor__advance( QueryState *state = &self->states.contents[j]; QueryStep *next_step = &self->query->steps.contents[state->step_index]; if (next_step->alternative_index != NONE) { + // A "dead-end" step exists only to add a non-sequential jump into the step sequence, + // via its alternative index. When a state reaches a dead-end step, it jumps straight + // to the step's alternative. if (next_step->is_dead_end) { state->step_index = next_step->alternative_index; j--; continue; } + // A "pass-through" step exists only to add a branch into the step sequence, + // via its alternative_index. When a state reaches a pass-through step, it splits + // in order to process the alternative step, and then it advances to the next step. if (next_step->is_pass_through) { state->step_index++; j--; diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 64e8b41423..f109524e87 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -364,6 +364,33 @@ void ts_tree_cursor_current_status( } } +TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + for (int i = (int)self->stack.size - 2; i >= 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + bool is_visible = true; + TSSymbol alias_symbol = 0; + if (i > 0) { + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + alias_symbol = ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + ); + is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); + } + if (is_visible) { + return ts_node_new( + self->tree, + entry->subtree, + entry->position, + alias_symbol + ); + } + } + return ts_node_new(NULL, NULL, length_zero(), 0); +} + TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 7c9c05d582..69647d1d33 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -26,4 +26,6 @@ void ts_tree_cursor_current_status( unsigned * ); +TSNode ts_tree_cursor_parent_node(const TSTreeCursor *); + #endif // TREE_SITTER_TREE_CURSOR_H_ From 000455ee790044c0f358c9b96de40a0acea97b1a Mon Sep 17 00:00:00 2001 From: Hansraj Das Date: Sun, 11 Oct 2020 13:02:40 +0530 Subject: [PATCH 199/282] Multiple typo fixes * This is a patch from neovim PR: https://github.com/neovim/neovim/pull/13063 --- lib/include/tree_sitter/api.h | 6 +++--- lib/src/lexer.c | 2 +- lib/src/parser.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index b85380d1e0..caa05f5220 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -220,8 +220,8 @@ const TSRange *ts_parser_included_ranges( * following three fields: * 1. `read`: A function to retrieve a chunk of text at a given byte offset * and (row, column) position. The function should return a pointer to the - * text and write its length to the the `bytes_read` pointer. The parser - * does not take ownership of this buffer; it just borrows it until it has + * text and write its length to the `bytes_read` pointer. The parser does + * not take ownership of this buffer; it just borrows it until it has * finished reading it. The function should write a zero value to the * `bytes_read` pointer to indicate the end of the document. * 2. `payload`: An arbitrary pointer that will be passed to each invocation @@ -765,7 +765,7 @@ void ts_query_disable_pattern(TSQuery *, uint32_t); * to start running a given query on a given syntax node. Then, there are * two options for consuming the results of the query: * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the - * the *matches* in the order that they were found. Each match contains the + * *matches* in the order that they were found. Each match contains the * index of the pattern that matched, and an array of captures. Because * multiple patterns can match the same set of nodes, one match may contain * captures that appear *before* some of the captures from a previous match. diff --git a/lib/src/lexer.c b/lib/src/lexer.c index a3c29544d3..08e90a8c7e 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -203,7 +203,7 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { // Is the lexer at a boundary between two disjoint included ranges of // source code? This is exposed as an API because some languages' external -// scanners need to perform custom actions at these bounaries. +// scanners need to perform custom actions at these boundaries. static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { const Lexer *self = (const Lexer *)_self; if (self->current_included_range_index < self->included_range_count) { diff --git a/lib/src/parser.c b/lib/src/parser.c index 79cad797a0..b88f84e42e 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1221,7 +1221,7 @@ static void ts_parser__recover( } } - // In the process of attemping to recover, some stack versions may have been created + // In the process of attempting to recover, some stack versions may have been created // and subsequently halted. Remove those versions. for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { if (!ts_stack_is_active(self->stack, i)) { From 84433494a565d1b8307050c1dccd9923d94f14c8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 12 Oct 2020 09:45:50 -0700 Subject: [PATCH 200/282] Fix query analysis error for rules w/ required hidden tokens Refs tree-sitter/node-tree-sitter#69 --- lib/src/query.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/src/query.c b/lib/src/query.c index 133762b908..ae476c2ab5 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -1167,8 +1167,6 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_insert_sorted_with(&deeper_states, analysis_state__compare, next_state); continue; } - } else { - continue; } // Pop from the stack when this state reached the end of its current syntax node. From 87fd2f5ca3c2198c6b11ece3e060aab0e81406ac Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 12 Oct 2020 11:53:16 -0700 Subject: [PATCH 201/282] rust: Detect debug builds using PROFILE env var in build script Fixes #757 --- lib/binding_rust/build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs index caf5fa8e74..0ec7a4ad6e 100644 --- a/lib/binding_rust/build.rs +++ b/lib/binding_rust/build.rs @@ -21,8 +21,8 @@ fn main() { let mut config = cc::Build::new(); - println!("cargo:rerun-if-env-changed=DEBUG"); - if env::var("DEBUG").map(|s| s == "true").unwrap_or(false) { + println!("cargo:rerun-if-env-changed=PROFILE"); + if env::var("PROFILE").map_or(false, |s| s == "debug") { config.define("TREE_SITTER_TEST", ""); } From 0a460333912e0f72ad3b4b48c20e22e74c539c88 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 14 Oct 2020 11:35:41 -0700 Subject: [PATCH 202/282] Remove duplication of LossyUtf8 helper --- cli/src/main.rs | 4 ++- cli/src/tests/highlight_test.rs | 2 +- highlight/src/lib.rs | 6 ++-- highlight/src/util.rs | 53 ------------------------------- lib/binding_rust/lib.rs | 2 ++ lib/binding_rust/util.rs | 53 +++++++++++++++++++++++++++++++ tags/src/lib.rs | 56 +-------------------------------- 7 files changed, 63 insertions(+), 113 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 0b470c743d..2e55c2fba4 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -427,7 +427,9 @@ fn collect_paths<'a>( } if result.is_empty() { - Error::err("No files were found at or matched by the provided pathname/glob".to_string())?; + Error::err( + "No files were found at or matched by the provided pathname/glob".to_string(), + )?; } return Ok(result); diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 1f7106dd19..6b09d64c50 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -588,7 +588,7 @@ fn test_highlighting_via_c_api() { #[test] fn test_decode_utf8_lossy() { - use tree_sitter_highlight::util::LossyUtf8; + use tree_sitter::LossyUtf8; let parts = LossyUtf8::new(b"hi").collect::>(); assert_eq!(parts, vec!["hi"]); diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index e4aebbfb2b..0f48847beb 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -5,8 +5,8 @@ pub use c_lib as c; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ - Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, - Range, Tree, + Language, LossyUtf8, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, + QueryMatch, Range, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -991,7 +991,7 @@ impl HtmlRenderer { F: Fn(Highlight) -> &'a [u8], { let mut last_char_was_cr = false; - for c in util::LossyUtf8::new(src).flat_map(|p| p.bytes()) { + for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) { // Don't render carriage return characters, but allow lone carriage returns (not // followed by line feeds) to be styled via the attribute callback. if c == b'\r' { diff --git a/highlight/src/util.rs b/highlight/src/util.rs index 6c325a6cf9..29adb13b11 100644 --- a/highlight/src/util.rs +++ b/highlight/src/util.rs @@ -1,56 +1,3 @@ -use std::str; - -pub struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - -impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { - LossyUtf8 { - bytes, - in_replacement: false, - } - } -} - -impl<'a> Iterator for LossyUtf8<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - if self.bytes.is_empty() { - return None; - } - if self.in_replacement { - self.in_replacement = false; - return Some("\u{fffd}"); - } - match str::from_utf8(self.bytes) { - Ok(valid) => { - self.bytes = &[]; - Some(valid) - } - Err(error) => { - if let Some(error_len) = error.error_len() { - let error_start = error.valid_up_to(); - if error_start > 0 { - let result = - unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) }; - self.bytes = &self.bytes[(error_start + error_len)..]; - self.in_replacement = true; - Some(result) - } else { - self.bytes = &self.bytes[error_len..]; - Some("\u{fffd}") - } - } else { - None - } - } - } - } -} - pub fn html_escape(c: u8) -> Option<&'static [u8]> { match c as char { '>' => Some(b">"), diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index b33beded50..e64833635f 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -12,6 +12,8 @@ use std::ptr::NonNull; use std::sync::atomic::AtomicUsize; use std::{char, fmt, hash, iter, ptr, slice, str, u16}; +pub use util::LossyUtf8; + /// The latest ABI version that is supported by the current version of the /// library. /// diff --git a/lib/binding_rust/util.rs b/lib/binding_rust/util.rs index 1a4ac1b77f..e2660c1451 100644 --- a/lib/binding_rust/util.rs +++ b/lib/binding_rust/util.rs @@ -72,6 +72,59 @@ pub struct CBufferIter { i: usize, } +// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` +// is ever stabilized. +pub struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + +impl<'a> LossyUtf8<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + LossyUtf8 { + bytes, + in_replacement: false, + } + } +} + +impl<'a> Iterator for LossyUtf8<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if self.bytes.is_empty() { + return None; + } + if self.in_replacement { + self.in_replacement = false; + return Some("\u{fffd}"); + } + match std::str::from_utf8(self.bytes) { + Ok(valid) => { + self.bytes = &[]; + Some(valid) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + let error_start = error.valid_up_to(); + if error_start > 0 { + let result = + unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; + self.bytes = &self.bytes[(error_start + error_len)..]; + self.in_replacement = true; + Some(result) + } else { + self.bytes = &self.bytes[error_len..]; + Some("\u{fffd}") + } + } else { + None + } + } + } + } +} + impl CBufferIter { pub unsafe fn new(ptr: *mut T, count: usize) -> Self { Self { ptr, count, i: 0 } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 576b04f8d7..12db90cbf2 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -8,7 +8,7 @@ use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{char, fmt, mem, str}; use tree_sitter::{ - Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, + Language, LossyUtf8, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, }; const MAX_LINE_LEN: usize = 180; @@ -107,11 +107,6 @@ struct LineInfo { line_range: Range, } -struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - impl TagsConfiguration { pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?; @@ -588,55 +583,6 @@ impl From for Error { } } -// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` -// is ever stabilized, we should use that. Otherwise, this struct could be moved -// into some module that's shared between `tree-sitter-tags` and `tree-sitter-highlight`. -impl<'a> LossyUtf8<'a> { - fn new(bytes: &'a [u8]) -> Self { - LossyUtf8 { - bytes, - in_replacement: false, - } - } -} - -impl<'a> Iterator for LossyUtf8<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - if self.bytes.is_empty() { - return None; - } - if self.in_replacement { - self.in_replacement = false; - return Some("\u{fffd}"); - } - match str::from_utf8(self.bytes) { - Ok(valid) => { - self.bytes = &[]; - Some(valid) - } - Err(error) => { - if let Some(error_len) = error.error_len() { - let error_start = error.valid_up_to(); - if error_start > 0 { - let result = - unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) }; - self.bytes = &self.bytes[(error_start + error_len)..]; - self.in_replacement = true; - Some(result) - } else { - self.bytes = &self.bytes[error_len..]; - Some("\u{fffd}") - } - } else { - None - } - } - } - } -} - fn line_range( text: &[u8], start_byte: usize, From 7aca28833007885d02e5b41d6a7affc17b92fa10 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 14 Oct 2020 11:59:56 -0700 Subject: [PATCH 203/282] Move LossyUtf8 struct out of util module --- lib/binding_rust/lib.rs | 55 ++++++++++++++++++++++++++++++++++++++-- lib/binding_rust/util.rs | 53 -------------------------------------- 2 files changed, 53 insertions(+), 55 deletions(-) diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index e64833635f..0b0097f93e 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -12,8 +12,6 @@ use std::ptr::NonNull; use std::sync::atomic::AtomicUsize; use std::{char, fmt, hash, iter, ptr, slice, str, u16}; -pub use util::LossyUtf8; - /// The latest ABI version that is supported by the current version of the /// library. /// @@ -184,6 +182,13 @@ enum TextPredicate { CaptureMatchString(u32, regex::bytes::Regex, bool), } +// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` +// is ever stabilized. +pub struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, +} + impl Language { /// Get the ABI version number that indicates which version of the Tree-sitter CLI /// that was used to generate this `Language`. @@ -1832,6 +1837,52 @@ impl<'a> Into for &'a InputEdit { } } +impl<'a> LossyUtf8<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + LossyUtf8 { + bytes, + in_replacement: false, + } + } +} + +impl<'a> Iterator for LossyUtf8<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if self.bytes.is_empty() { + return None; + } + if self.in_replacement { + self.in_replacement = false; + return Some("\u{fffd}"); + } + match std::str::from_utf8(self.bytes) { + Ok(valid) => { + self.bytes = &[]; + Some(valid) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + let error_start = error.valid_up_to(); + if error_start > 0 { + let result = + unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; + self.bytes = &self.bytes[(error_start + error_len)..]; + self.in_replacement = true; + Some(result) + } else { + self.bytes = &self.bytes[error_len..]; + Some("\u{fffd}") + } + } else { + None + } + } + } + } +} + fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, diff --git a/lib/binding_rust/util.rs b/lib/binding_rust/util.rs index e2660c1451..1a4ac1b77f 100644 --- a/lib/binding_rust/util.rs +++ b/lib/binding_rust/util.rs @@ -72,59 +72,6 @@ pub struct CBufferIter { i: usize, } -// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` -// is ever stabilized. -pub struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - -impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { - LossyUtf8 { - bytes, - in_replacement: false, - } - } -} - -impl<'a> Iterator for LossyUtf8<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - if self.bytes.is_empty() { - return None; - } - if self.in_replacement { - self.in_replacement = false; - return Some("\u{fffd}"); - } - match std::str::from_utf8(self.bytes) { - Ok(valid) => { - self.bytes = &[]; - Some(valid) - } - Err(error) => { - if let Some(error_len) = error.error_len() { - let error_start = error.valid_up_to(); - if error_start > 0 { - let result = - unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; - self.bytes = &self.bytes[(error_start + error_len)..]; - self.in_replacement = true; - Some(result) - } else { - self.bytes = &self.bytes[error_len..]; - Some("\u{fffd}") - } - } else { - None - } - } - } - } -} - impl CBufferIter { pub unsafe fn new(ptr: *mut T, count: usize) -> Self { Self { ptr, count, i: 0 } From 8bb8e9b8b3456bc77643ecc3538c6b5205b4d8db Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Oct 2020 07:20:12 -0700 Subject: [PATCH 204/282] Initialize TSLanguage fields in order of their declaration This makes parser.c valid under the C++20 standard --- cli/src/generate/render.rs | 81 +++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index cf3109c8a9..f7f788d07e 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1268,34 +1268,12 @@ impl Generator { add_line!(self, ".symbol_count = SYMBOL_COUNT,"); add_line!(self, ".alias_count = ALIAS_COUNT,"); add_line!(self, ".token_count = TOKEN_COUNT,"); - add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); - - if self.next_abi { - add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); - add_line!(self, ".state_count = STATE_COUNT,"); - } - + add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); + add_line!(self, ".symbol_names = ts_symbol_names,"); add_line!(self, ".symbol_metadata = ts_symbol_metadata,"); - add_line!( - self, - ".parse_table = (const unsigned short *)ts_parse_table," - ); - - if self.large_state_count < self.parse_table.states.len() { - add_line!( - self, - ".small_parse_table = (const uint16_t *)ts_small_parse_table," - ); - add_line!( - self, - ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map," - ); - } - + add_line!(self, ".parse_table = (const uint16_t *)ts_parse_table,"); add_line!(self, ".parse_actions = ts_parse_actions,"); add_line!(self, ".lex_modes = ts_lex_modes,"); - add_line!(self, ".symbol_names = ts_symbol_names,"); - add_line!(self, ".public_symbol_map = ts_symbol_map,"); if !self.parse_table.production_infos.is_empty() { add_line!( @@ -1303,27 +1281,12 @@ impl Generator { ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); } - - add_line!(self, ".field_count = FIELD_COUNT,"); - - if !self.field_names.is_empty() { - add_line!(self, ".field_names = ts_field_names,"); - add_line!( - self, - ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," - ); - add_line!( - self, - ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," - ); - } - add_line!( self, ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," ); - add_line!(self, ".lex_fn = ts_lex,"); + add_line!(self, ".lex_fn = ts_lex,"); if let Some(keyword_capture_token) = self.keyword_capture_token { add_line!(self, ".keyword_lex_fn = ts_lex_keywords,"); add_line!( @@ -1333,8 +1296,6 @@ impl Generator { ); } - add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); - if !self.syntax_grammar.external_tokens.is_empty() { add_line!(self, ".external_scanner = {{"); indent!(self); @@ -1348,8 +1309,40 @@ impl Generator { dedent!(self); add_line!(self, "}},"); } - dedent!(self); + add_line!(self, ".field_count = FIELD_COUNT,"); + if !self.field_names.is_empty() { + add_line!( + self, + ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," + ); + add_line!( + self, + ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," + ); + add_line!(self, ".field_names = ts_field_names,"); + } + + add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); + if self.large_state_count < self.parse_table.states.len() { + add_line!( + self, + ".small_parse_table = (const uint16_t *)ts_small_parse_table," + ); + add_line!( + self, + ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map," + ); + } + + add_line!(self, ".public_symbol_map = ts_symbol_map,"); + + if self.next_abi { + add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); + add_line!(self, ".state_count = STATE_COUNT,"); + } + + dedent!(self); add_line!(self, "}};"); add_line!(self, "return &language;"); dedent!(self); From 50ff4376b4a20fd8f5267d1af20116e874b00823 Mon Sep 17 00:00:00 2001 From: ikrima Date: Thu, 15 Oct 2020 20:21:33 -0700 Subject: [PATCH 205/282] fix: ts_subtree_string not using ts_malloc --- lib/src/subtree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 24dc06b203..a72d2ec2fd 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -902,7 +902,7 @@ char *ts_subtree_string( language, include_all, 0, false, ROOT_FIELD ) + 1; - char *result = malloc(size * sizeof(char)); + char *result = ts_malloc(size * sizeof(char)); ts_subtree__write_to_string( self, result, size, language, include_all, From 3eee1d5376577c86f8f730b012b1bdfecf8c11d2 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Sun, 27 Sep 2020 16:49:17 +0800 Subject: [PATCH 206/282] Fix compilation with TinyCC --- lib/src/atomic.h | 18 +++++++++++++++++- lib/src/bits.h | 15 ++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/src/atomic.h b/lib/src/atomic.h index 7bd0e850a9..16573242bb 100644 --- a/lib/src/atomic.h +++ b/lib/src/atomic.h @@ -3,7 +3,23 @@ #include -#ifdef _WIN32 +#ifdef __TINYC__ + +static inline size_t atomic_load(const volatile size_t *p) { + return *p; +} + +static inline uint32_t atomic_inc(volatile uint32_t *p) { + *p += 1; + return *p; +} + +static inline uint32_t atomic_dec(volatile uint32_t *p) { + *p-= 1; + return *p; +} + +#elif defined(_WIN32) #include diff --git a/lib/src/bits.h b/lib/src/bits.h index ce7a715567..ca8caf30aa 100644 --- a/lib/src/bits.h +++ b/lib/src/bits.h @@ -7,7 +7,20 @@ static inline uint32_t bitmask_for_index(uint16_t id) { return (1u << (31 - id)); } -#if defined _WIN32 && !defined __GNUC__ +#ifdef __TINYC__ + +// Algorithm taken from the Hacker's Delight book +// See also https://graphics.stanford.edu/~seander/bithacks.html +static inline uint32_t count_leading_zeros(uint32_t x) { + int count = 0; + if (x == 0) return 32; + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; + return count; +} + +#elif defined _WIN32 && !defined __GNUC__ #include From 6f13d6bbba9e9cd13a75919e1bcab686709e6d5f Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 21 Oct 2020 11:22:56 -0400 Subject: [PATCH 207/282] Define Python fixture --- test/fixtures/queries/python.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 test/fixtures/queries/python.py diff --git a/test/fixtures/queries/python.py b/test/fixtures/queries/python.py new file mode 100644 index 0000000000..c90830a717 --- /dev/null +++ b/test/fixtures/queries/python.py @@ -0,0 +1,7 @@ +def foo(): pass +# declaration: function: 0, 0 + +def bar(): +# declaration: function, 3, 0 + foo() +# reference: call, 5, 4 From 91d5d59d85bf24a32840ea54404cfe80ef76cd2c Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 21 Oct 2020 12:37:24 -0400 Subject: [PATCH 208/282] Introduce query/assert and call it in query.rs. --- cli/src/main.rs | 13 +++++++++++-- cli/src/query.rs | 26 +++++++++++++++++++++----- cli/src/query/assert.rs | 23 +++++++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 cli/src/query/assert.rs diff --git a/cli/src/main.rs b/cli/src/main.rs index 2e55c2fba4..7594ce276c 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -95,7 +95,8 @@ fn run() -> error::Result<()> { .takes_value(true), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) - .arg(Arg::with_name("captures").long("captures").short("c")), + .arg(Arg::with_name("captures").long("captures").short("c")) + .arg(Arg::with_name("test").long("test")), ) .subcommand( SubCommand::with_name("tags") @@ -289,7 +290,15 @@ fn run() -> error::Result<()> { let r: Vec<&str> = br.split(":").collect(); (r[0].parse().unwrap(), r[1].parse().unwrap()) }); - query::query_files_at_paths(language, paths, query_path, ordered_captures, range)?; + let should_test = matches.is_present("test"); + query::query_files_at_paths( + language, + paths, + query_path, + ordered_captures, + range, + should_test, + )?; } else if let Some(matches) = matches.subcommand_matches("tags") { loader.find_all_languages(&config.parser_directories)?; let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; diff --git a/cli/src/query.rs b/cli/src/query.rs index e71e62540c..d2aefc7ffd 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -4,12 +4,17 @@ use std::io::{self, Write}; use std::path::Path; use tree_sitter::{Language, Node, Parser, Query, QueryCursor}; +mod assert; + +use assert::CaptureInfo; + pub fn query_files_at_paths( language: Language, paths: Vec, query_path: &Path, ordered_captures: bool, range: Option<(usize, usize)>, + should_test: bool, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); @@ -29,6 +34,8 @@ pub fn query_files_at_paths( parser.set_language(language).map_err(|e| e.to_string())?; for path in paths { + let mut results = Vec::new(); + writeln!(&mut stdout, "{}", path)?; let source_code = fs::read(&path).map_err(Error::wrap(|| { @@ -42,14 +49,18 @@ pub fn query_files_at_paths( query_cursor.captures(&query, tree.root_node(), text_callback) { let capture = mat.captures[capture_index]; + let capture_name = &query.capture_names()[capture.index as usize]; writeln!( &mut stdout, " pattern: {}, capture: {}, row: {}, text: {:?}", mat.pattern_index, - &query.capture_names()[capture.index as usize], + capture_name, capture.node.start_position().row, capture.node.utf8_text(&source_code).unwrap_or("") )?; + results.push(CaptureInfo { + name: capture_name.to_string(), + }); } } else { for m in query_cursor.matches(&query, tree.root_node(), text_callback) { @@ -57,11 +68,12 @@ pub fn query_files_at_paths( for capture in m.captures { let start = capture.node.start_position(); let end = capture.node.end_position(); + let capture_name = &query.capture_names()[capture.index as usize]; if end.row == start.row { writeln!( &mut stdout, " capture: {}, start: {}, text: {:?}", - &query.capture_names()[capture.index as usize], + capture_name, start, capture.node.utf8_text(&source_code).unwrap_or("") )?; @@ -69,14 +81,18 @@ pub fn query_files_at_paths( writeln!( &mut stdout, " capture: {}, start: {}, end: {}", - &query.capture_names()[capture.index as usize], - start, - end, + capture_name, start, end, )?; } + results.push(CaptureInfo { + name: capture_name.to_string(), + }); } } } + if should_test { + assert::assert_expected_captures(results, path); + } } Ok(()) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs new file mode 100644 index 0000000000..5fb7e1d63d --- /dev/null +++ b/cli/src/query/assert.rs @@ -0,0 +1,23 @@ +use lazy_static::lazy_static; +use regex::Regex; +use tree_sitter::Point; + +// TODO: It would be cooler to do this with a comments query rather than with a regex +// directly. +lazy_static! { + static ref METADATA_PAIR_REGEX: Regex = Regex::new(r#"(\w+): ([^\s,]+)"#).unwrap(); + static ref NUMBER_REGEX: Regex = Regex::new(r#"\d+"#).unwrap(); +} + +pub struct CaptureInfo { + pub name: String, +} + +#[derive(Debug, Eq, PartialEq)] +struct Assertion { + position: Point, + line_numbers: Vec, + capture_type: String, +} + +pub fn assert_expected_captures(_captures: Vec, _path: String) {} From 947528f01903930dd8e7201fff6e32d00ea79541 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 21 Oct 2020 12:49:41 -0400 Subject: [PATCH 209/282] use our Result type here --- cli/src/query.rs | 2 +- cli/src/query/assert.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cli/src/query.rs b/cli/src/query.rs index d2aefc7ffd..56b8674050 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -91,7 +91,7 @@ pub fn query_files_at_paths( } } if should_test { - assert::assert_expected_captures(results, path); + assert::assert_expected_captures(results, path)? } } diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 5fb7e1d63d..d4140f238d 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -1,3 +1,4 @@ +use super::super::error::Result; use lazy_static::lazy_static; use regex::Regex; use tree_sitter::Point; @@ -20,4 +21,6 @@ struct Assertion { capture_type: String, } -pub fn assert_expected_captures(_captures: Vec, _path: String) {} +pub fn assert_expected_captures(_captures: Vec, _path: String) -> Result<()> { + Ok(()) +} From c691df5ae22ff1ed3e20685186fcafc5adb163bf Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 21 Oct 2020 12:56:11 -0400 Subject: [PATCH 210/282] reading in the source correctly --- cli/src/query/assert.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index d4140f238d..35a7f4b7ae 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -1,6 +1,7 @@ use super::super::error::Result; use lazy_static::lazy_static; use regex::Regex; +use std::fs; use tree_sitter::Point; // TODO: It would be cooler to do this with a comments query rather than with a regex @@ -21,6 +22,10 @@ struct Assertion { capture_type: String, } -pub fn assert_expected_captures(_captures: Vec, _path: String) -> Result<()> { +pub fn assert_expected_captures(_captures: Vec, path: String) -> Result<()> { + let contents = fs::read_to_string(path)?; + for m in METADATA_PAIR_REGEX.captures_iter(&contents) { + println!("pair: {:?}", m); + } Ok(()) } From 0dfe89f3538d7ed8f08d8b9b7b05e515d82989c0 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 21 Oct 2020 13:32:04 -0400 Subject: [PATCH 211/282] parse assertions from regex capture --- cli/src/query/assert.rs | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 35a7f4b7ae..d5998eaf60 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -7,8 +7,7 @@ use tree_sitter::Point; // TODO: It would be cooler to do this with a comments query rather than with a regex // directly. lazy_static! { - static ref METADATA_PAIR_REGEX: Regex = Regex::new(r#"(\w+): ([^\s,]+)"#).unwrap(); - static ref NUMBER_REGEX: Regex = Regex::new(r#"\d+"#).unwrap(); + static ref METADATA_REGEX: Regex = Regex::new(r#"(\w+): ([^\s,]+), (\d+), (\d+)"#).unwrap(); } pub struct CaptureInfo { @@ -18,14 +17,44 @@ pub struct CaptureInfo { #[derive(Debug, Eq, PartialEq)] struct Assertion { position: Point, - line_numbers: Vec, + capture_class: String, capture_type: String, } +impl From> for Assertion { + fn from(re: regex::Captures) -> Assertion { + Assertion { + capture_class: re.get(1).unwrap().as_str().to_string(), + capture_type: re.get(2).unwrap().as_str().to_string(), + position: Point { + row: re + .get(3) + .iter() + .flat_map(|m| m.as_str().parse::()) + .next() + .unwrap(), + column: re + .get(4) + .iter() + .flat_map(|m| m.as_str().parse::()) + .next() + .unwrap(), + }, + } + } +} + pub fn assert_expected_captures(_captures: Vec, path: String) -> Result<()> { let contents = fs::read_to_string(path)?; - for m in METADATA_PAIR_REGEX.captures_iter(&contents) { - println!("pair: {:?}", m); + + let assertions: Vec = METADATA_REGEX + .captures_iter(&contents) + .map(|c| Assertion::from(c)) + .collect(); + + for a in assertions { + println!("a: {:?}", a); } + Ok(()) } From 363a0ce4fccd59230df9063cfded5dbede15907c Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Wed, 21 Oct 2020 14:54:47 -0400 Subject: [PATCH 212/282] things are working: time to piggyback off the highlighter's parser --- cli/src/query.rs | 2 ++ cli/src/query/assert.rs | 25 +++++++++++++++++++++---- test/fixtures/queries/python.py | 4 ++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cli/src/query.rs b/cli/src/query.rs index 56b8674050..704a2c5611 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -60,6 +60,7 @@ pub fn query_files_at_paths( )?; results.push(CaptureInfo { name: capture_name.to_string(), + position: capture.node.start_position(), }); } } else { @@ -86,6 +87,7 @@ pub fn query_files_at_paths( } results.push(CaptureInfo { name: capture_name.to_string(), + position: capture.node.start_position(), }); } } diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index d5998eaf60..1b31c1c07f 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -1,6 +1,8 @@ +use super::super::error; use super::super::error::Result; use lazy_static::lazy_static; use regex::Regex; +use std::collections::hash_map::HashMap; use std::fs; use tree_sitter::Point; @@ -10,8 +12,10 @@ lazy_static! { static ref METADATA_REGEX: Regex = Regex::new(r#"(\w+): ([^\s,]+), (\d+), (\d+)"#).unwrap(); } +#[derive(Debug, Eq, PartialEq)] pub struct CaptureInfo { pub name: String, + pub position: Point, } #[derive(Debug, Eq, PartialEq)] @@ -44,7 +48,7 @@ impl From> for Assertion { } } -pub fn assert_expected_captures(_captures: Vec, path: String) -> Result<()> { +pub fn assert_expected_captures(captures: Vec, path: String) -> Result<()> { let contents = fs::read_to_string(path)?; let assertions: Vec = METADATA_REGEX @@ -52,9 +56,22 @@ pub fn assert_expected_captures(_captures: Vec, path: String) -> Re .map(|c| Assertion::from(c)) .collect(); - for a in assertions { - println!("a: {:?}", a); - } + let per_position_index: HashMap = + assertions.iter().map(|a| (a.position, a)).collect(); + for capture in &captures { + let oFound = per_position_index.get(&capture.position); + if oFound.is_none() { + continue; + } + let found = oFound.unwrap(); + let joined = format!("{}.{}", found.capture_class, found.capture_type); + if joined != capture.name && capture.name != "name" { + Err(error::Error::new(format!( + "Assertion failed: at {}, found {}, expected {}", + capture.position, capture.name, joined + )))? + } + } Ok(()) } diff --git a/test/fixtures/queries/python.py b/test/fixtures/queries/python.py index c90830a717..a48ed2de92 100644 --- a/test/fixtures/queries/python.py +++ b/test/fixtures/queries/python.py @@ -1,7 +1,7 @@ def foo(): pass -# declaration: function: 0, 0 +# definition: function: 0, 0 def bar(): -# declaration: function, 3, 0 +# definition: function, 3, 0 foo() # reference: call, 5, 4 From e370c5053e2134a44b9f35a5347be408b0c88135 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Fri, 23 Oct 2020 14:11:46 -0400 Subject: [PATCH 213/282] this is nicer, though --- cli/src/query/assert.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 1b31c1c07f..96162c5bed 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -1,5 +1,5 @@ -use super::super::error; -use super::super::error::Result; +use crate::error; +use crate::error::Result; use lazy_static::lazy_static; use regex::Regex; use std::collections::hash_map::HashMap; @@ -48,7 +48,7 @@ impl From> for Assertion { } } -pub fn assert_expected_captures(captures: Vec, path: String) -> Result<()> { +pub fn assert_expected_captures(infos: Vec, path: String) -> Result<()> { let contents = fs::read_to_string(path)?; let assertions: Vec = METADATA_REGEX @@ -59,17 +59,16 @@ pub fn assert_expected_captures(captures: Vec, path: String) -> Res let per_position_index: HashMap = assertions.iter().map(|a| (a.position, a)).collect(); - for capture in &captures { - let oFound = per_position_index.get(&capture.position); - if oFound.is_none() { + for info in &infos { + if !per_position_index.contains_key(&info.position) { continue; } - let found = oFound.unwrap(); + let found = per_position_index.get(&info.position).unwrap(); let joined = format!("{}.{}", found.capture_class, found.capture_type); - if joined != capture.name && capture.name != "name" { + if joined != info.name && info.name != "name" { Err(error::Error::new(format!( "Assertion failed: at {}, found {}, expected {}", - capture.position, capture.name, joined + info.position, info.name, joined )))? } } From b5d20f07b653449bf467ce2699abf67e28214108 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Fri, 23 Oct 2020 14:34:56 -0400 Subject: [PATCH 214/282] Document query anchor operator (#771) This was taken more or less directly from @maxbrunsfeld's PR comments in https://github.com/tree-sitter/tree-sitter/pull/549. :tophat: --- docs/section-2-using-parsers.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index a9f5de0295..e73dee5518 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -585,6 +585,38 @@ For example, this pattern would match any node inside a call: (call (_) @call.inner) ``` + +#### Anchor Nodes + +The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query. + +When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`: + +``` +(array . (identifier) @the-element) +``` + +Were this anchor operator elided, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier. + +Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. + +``` +(block (_) @last-expression .) +``` + +Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`. + +``` +(dotted_name + (identifier) @prev-id + . + (identifier) @next-id) +``` + +Were the anchor elided, non-consecutive pairs like `a, c` and `b, d` would be matched. + +The restrictions placed on a pattern by an anchor operator ignore anonymous nodes. + #### Predicates You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. From b42b873564546c5838f7acba0afa2722b0f033b4 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Fri, 23 Oct 2020 15:11:17 -0400 Subject: [PATCH 215/282] Update docs/section-2-using-parsers.md Co-authored-by: Max Brunsfeld --- docs/section-2-using-parsers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index e73dee5518..3b2f282a98 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -613,7 +613,7 @@ Finally, an anchor _between_ two child patterns will cause the patterns to only (identifier) @next-id) ``` -Were the anchor elided, non-consecutive pairs like `a, c` and `b, d` would be matched. +Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched. The restrictions placed on a pattern by an anchor operator ignore anonymous nodes. From 4d8cdc2f368c127fcb439309a9b78d9a889fdf6e Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Fri, 23 Oct 2020 15:11:23 -0400 Subject: [PATCH 216/282] Update docs/section-2-using-parsers.md Co-authored-by: Max Brunsfeld --- docs/section-2-using-parsers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 3b2f282a98..7166c77695 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -596,7 +596,7 @@ When `.` is placed before the _first_ child within a parent pattern, the child w (array . (identifier) @the-element) ``` -Were this anchor operator elided, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier. +Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier. Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. From 1749a5d672673cda78e6dd02baac7bec76eee335 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Fri, 23 Oct 2020 15:11:53 -0400 Subject: [PATCH 217/282] Shorter wording. --- docs/section-2-using-parsers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 7166c77695..75c508f589 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -586,7 +586,7 @@ For example, this pattern would match any node inside a call: ``` -#### Anchor Nodes +#### Anchors The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query. From d533d1f07654b7011c6c914537df3d1e498b8193 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 22 Oct 2020 16:21:47 -0700 Subject: [PATCH 218/282] Allocate parent nodes together with their child array --- lib/src/array.h | 11 +- lib/src/get_changed_ranges.c | 4 +- lib/src/node.c | 4 +- lib/src/parser.c | 110 ++++++++-------- lib/src/reusable_node.h | 4 +- lib/src/stack.c | 5 +- lib/src/subtree.c | 234 ++++++++++++++++++++++------------- lib/src/subtree.h | 19 ++- lib/src/tree_cursor.c | 6 +- 9 files changed, 240 insertions(+), 157 deletions(-) diff --git a/lib/src/array.h b/lib/src/array.h index de8c8cb381..13117194d9 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -74,6 +74,9 @@ extern "C" { #define array_assign(self, other) \ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) +#define array_swap(self, other) \ + array__swap((VoidArray *)(self), (VoidArray *)(other)) + // Search a sorted array for a given `needle` value, using the given `compare` // callback to determine the order. // @@ -139,7 +142,7 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t if (self->contents) { self->contents = ts_realloc(self->contents, new_capacity * element_size); } else { - self->contents = ts_calloc(new_capacity, element_size); + self->contents = ts_malloc(new_capacity * element_size); } self->capacity = new_capacity; } @@ -151,6 +154,12 @@ static inline void array__assign(VoidArray *self, const VoidArray *other, size_t memcpy(self->contents, other->contents, self->size * element_size); } +static inline void array__swap(VoidArray *self, VoidArray *other) { + VoidArray swap = *other; + *other = *self; + *self = swap; +} + static inline void array__grow(VoidArray *self, size_t count, size_t element_size) { size_t new_size = self->size + count; if (new_size > self->capacity) { diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index b24f314949..b8915544f0 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -205,7 +205,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { Length position = entry.position; uint32_t structural_child_index = 0; for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { - const Subtree *child = &entry.subtree->ptr->children[i]; + const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); Length child_right = length_add(child_left, ts_subtree_size(*child)); @@ -260,7 +260,7 @@ static void iterator_advance(Iterator *self) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); uint32_t structural_child_index = entry.structural_child_index; if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; - const Subtree *next_child = &parent->ptr->children[child_index]; + const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, diff --git a/lib/src/node.c b/lib/src/node.c index 576f3ef38e..9ce0f0b3b3 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -79,7 +79,7 @@ static inline bool ts_node_child_iterator_next( TSNode *result ) { if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; - const Subtree *child = &self->parent.ptr->children[self->child_index]; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; TSSymbol alias_symbol = 0; if (!ts_subtree_extra(*child)) { if (self->alias_sequence) { @@ -178,7 +178,7 @@ static bool ts_subtree_has_trailing_empty_descendant( Subtree other ) { for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { - Subtree child = self.ptr->children[i]; + Subtree child = ts_subtree_children(self)[i]; if (ts_subtree_total_bytes(child) > 0) break; if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { return true; diff --git a/lib/src/parser.c b/lib/src/parser.c index b88f84e42e..3984d0021b 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -60,8 +60,9 @@ struct TSParser { const TSLanguage *language; ReduceActionSet reduce_actions; Subtree finished_tree; - SubtreeHeapData scratch_tree_data; - MutableSubtree scratch_tree; + SubtreeArray trailing_extras; + SubtreeArray trailing_extras2; + SubtreeArray scratch_trees; TokenCache token_cache; ReusableNode reusable_node; void *external_scanner_payload; @@ -155,7 +156,7 @@ static bool ts_parser__breakdown_top_of_stack( Subtree parent = *array_front(&slice.subtrees); for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { - Subtree child = parent.ptr->children[j]; + Subtree child = ts_subtree_children(parent)[j]; pending = ts_subtree_child_count(child) > 0; if (ts_subtree_is_error(child)) { @@ -717,6 +718,25 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) } } +static bool ts_parser__select_children( + TSParser *self, + Subtree left, + const SubtreeArray *children +) { + array_assign(&self->scratch_trees, children); + MutableSubtree scratch_tree = ts_subtree_new_node( + ts_subtree_symbol(left), + &self->scratch_trees, + 0, + self->language + ); + return ts_parser__select_tree( + self, + left, + ts_subtree_from_mut(scratch_tree) + ); +} + static void ts_parser__shift( TSParser *self, StackVersion version, @@ -742,22 +762,6 @@ static void ts_parser__shift( } } -static bool ts_parser__replace_children( - TSParser *self, - MutableSubtree *tree, - SubtreeArray *children -) { - *self->scratch_tree.ptr = *tree->ptr; - self->scratch_tree.ptr->child_count = 0; - ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language); - if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) { - *tree->ptr = *self->scratch_tree.ptr; - return true; - } else { - return false; - } -} - static StackVersion ts_parser__reduce( TSParser *self, StackVersion version, @@ -802,11 +806,9 @@ static StackVersion ts_parser__reduce( // node. They will be re-pushed onto the stack after the parent node is // created and pushed. SubtreeArray children = slice.subtrees; - while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) { - children.size--; - } + ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); - MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, + MutableSubtree parent = ts_subtree_new_node( symbol, &children, production_id, self->language ); @@ -820,14 +822,21 @@ static StackVersion ts_parser__reduce( i++; SubtreeArray children = next_slice.subtrees; - while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) { - children.size--; - } + ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2); - if (ts_parser__replace_children(self, &parent, &children)) { - ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - slice = next_slice; + if (ts_parser__select_children( + self, + ts_subtree_from_mut(parent), + &children + )) { + ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); + ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); + array_swap(&self->trailing_extras, &self->trailing_extras2); + parent = ts_subtree_new_node( + symbol, &children, production_id, self->language + ); } else { + array_clear(&self->trailing_extras2); ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); } } @@ -851,8 +860,8 @@ static StackVersion ts_parser__reduce( // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); - for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) { - ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state); + for (uint32_t j = 0; j < self->trailing_extras.size; j++) { + ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); } for (StackVersion j = 0; j < slice_version; j++) { @@ -884,22 +893,22 @@ static void ts_parser__accept( Subtree root = NULL_SUBTREE; for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { - Subtree child = trees.contents[j]; - if (!ts_subtree_extra(child)) { - assert(!child.data.is_inline); - uint32_t child_count = ts_subtree_child_count(child); + Subtree tree = trees.contents[j]; + if (!ts_subtree_extra(tree)) { + assert(!tree.data.is_inline); + uint32_t child_count = ts_subtree_child_count(tree); + const Subtree *children = ts_subtree_children(tree); for (uint32_t k = 0; k < child_count; k++) { - ts_subtree_retain(child.ptr->children[k]); + ts_subtree_retain(children[k]); } - array_splice(&trees, j, 1, child_count, child.ptr->children); + array_splice(&trees, j, 1, child_count, children); root = ts_subtree_from_mut(ts_subtree_new_node( - &self->tree_pool, - ts_subtree_symbol(child), + ts_subtree_symbol(tree), &trees, - child.ptr->production_id, + tree.ptr->production_id, self->language )); - ts_subtree_release(&self->tree_pool, child); + ts_subtree_release(&self->tree_pool, tree); break; } } @@ -1125,7 +1134,7 @@ static bool ts_parser__recover_to_state( Subtree error_tree = error_trees.contents[0]; uint32_t error_child_count = ts_subtree_child_count(error_tree); if (error_child_count > 0) { - array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children); + array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); for (unsigned j = 0; j < error_child_count; j++) { ts_subtree_retain(slice.subtrees.contents[j]); } @@ -1133,22 +1142,21 @@ static bool ts_parser__recover_to_state( ts_subtree_array_delete(&self->tree_pool, &error_trees); } - SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees); + ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); if (slice.subtrees.size > 0) { - Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language); + Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); ts_stack_push(self->stack, slice.version, error, false, goal_state); } else { array_delete(&slice.subtrees); } - for (unsigned j = 0; j < trailing_extras.size; j++) { - Subtree tree = trailing_extras.contents[j]; + for (unsigned j = 0; j < self->trailing_extras.size; j++) { + Subtree tree = self->trailing_extras.contents[j]; ts_stack_push(self->stack, slice.version, tree, false, goal_state); } previous_version = slice.version; - array_delete(&trailing_extras); } return previous_version != STACK_VERSION_NONE; @@ -1245,7 +1253,7 @@ static void ts_parser__recover( if (ts_subtree_is_eof(lookahead)) { LOG("recover_eof"); SubtreeArray children = array_new(); - Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language); + Subtree parent = ts_subtree_new_error_node(&children, false, self->language); ts_stack_push(self->stack, version, parent, false, 1); ts_parser__accept(self, version, lookahead); return; @@ -1278,7 +1286,6 @@ static void ts_parser__recover( array_reserve(&children, 1); array_push(&children, lookahead); MutableSubtree error_repeat = ts_subtree_new_node( - &self->tree_pool, ts_builtin_sym_error_repeat, &children, 0, @@ -1307,7 +1314,6 @@ static void ts_parser__recover( ts_stack_renumber_version(self->stack, pop.contents[0].version, version); array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat)); error_repeat = ts_subtree_new_node( - &self->tree_pool, ts_builtin_sym_error_repeat, &pop.contents[0].subtrees, 0, @@ -1666,7 +1672,6 @@ TSParser *ts_parser_new(void) { self->end_clock = clock_null(); self->operation_count = 0; self->old_tree = NULL_SUBTREE; - self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); self->included_range_difference_index = 0; ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); @@ -1692,6 +1697,9 @@ void ts_parser_delete(TSParser *self) { ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); ts_subtree_pool_delete(&self->tree_pool); reusable_node_delete(&self->reusable_node); + array_delete(&self->trailing_extras); + array_delete(&self->trailing_extras2); + array_delete(&self->scratch_trees); ts_free(self); } diff --git a/lib/src/reusable_node.h b/lib/src/reusable_node.h index e5ccaa2a28..63fe3c1a36 100644 --- a/lib/src/reusable_node.h +++ b/lib/src/reusable_node.h @@ -53,7 +53,7 @@ static inline void reusable_node_advance(ReusableNode *self) { } while (ts_subtree_child_count(tree) <= next_index); array_push(&self->stack, ((StackEntry) { - .tree = tree.ptr->children[next_index], + .tree = ts_subtree_children(tree)[next_index], .child_index = next_index, .byte_offset = byte_offset, })); @@ -63,7 +63,7 @@ static inline bool reusable_node_descend(ReusableNode *self) { StackEntry last_entry = *array_back(&self->stack); if (ts_subtree_child_count(last_entry.tree) > 0) { array_push(&self->stack, ((StackEntry) { - .tree = last_entry.tree.ptr->children[0], + .tree = ts_subtree_children(last_entry.tree)[0], .child_index = 0, .byte_offset = last_entry.byte_offset, })); diff --git a/lib/src/stack.c b/lib/src/stack.c index 6a8d897c37..cc728b05b0 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -288,7 +288,7 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, bool include_subtrees = false; if (goal_subtree_count >= 0) { include_subtrees = true; - array_reserve(&iterator.subtrees, goal_subtree_count); + array_reserve(&iterator.subtrees, ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); } array_push(&self->iterators, iterator); @@ -304,8 +304,9 @@ inline StackSliceArray stack__iter(Stack *self, StackVersion version, if (should_pop) { SubtreeArray subtrees = iterator->subtrees; - if (!should_stop) + if (!should_stop) { ts_subtree_array_copy(subtrees, &subtrees); + } ts_subtree_array_reverse(&subtrees); ts_stack__add_slice( self, diff --git a/lib/src/subtree.c b/lib/src/subtree.c index a72d2ec2fd..4d3986ec38 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -80,26 +80,33 @@ void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { } } -void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { +void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { for (uint32_t i = 0; i < self->size; i++) { ts_subtree_release(pool, self->contents[i]); } - array_delete(self); + array_clear(self); } -SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) { - SubtreeArray result = array_new(); +void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { + ts_subtree_array_clear(pool, self); + array_delete(self); +} - uint32_t i = self->size - 1; - for (; i + 1 > 0; i--) { - Subtree child = self->contents[i]; - if (!ts_subtree_extra(child)) break; - array_push(&result, child); +void ts_subtree_array_remove_trailing_extras( + SubtreeArray *self, + SubtreeArray *destination +) { + array_clear(destination); + while (self->size > 0) { + Subtree last = self->contents[self->size - 1]; + if (ts_subtree_extra(last)) { + self->size--; + array_push(destination, last); + } else { + break; + } } - - self->size = i + 1; - ts_subtree_array_reverse(&result); - return result; + ts_subtree_array_reverse(destination); } void ts_subtree_array_reverse(SubtreeArray *self) { @@ -247,28 +254,49 @@ Subtree ts_subtree_new_error( return result; } -MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { - if (self.data.is_inline) return (MutableSubtree) {self.data}; - if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - - SubtreeHeapData *result = ts_subtree_pool_allocate(pool); - memcpy(result, self.ptr, sizeof(SubtreeHeapData)); - if (result->child_count > 0) { - result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree)); - memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree)); - for (uint32_t i = 0; i < result->child_count; i++) { - ts_subtree_retain(result->children[i]); +// Get the size needed to store a heap-allocated subtree with +// the given number o children. +// Clone a subtree. +// +// This will reuse the given allocated buffer if it is present. If the +// buffer is NULL, a new allocation will be created. +MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { + size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); + Subtree *children = ts_realloc(buffer_to_reuse, alloc_size); + memcpy(children, ts_subtree_children(self), alloc_size); + SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; + if (self.ptr->child_count > 0) { + for (uint32_t i = 0; i < self.ptr->child_count; i++) { + ts_subtree_retain(children[i]); } - } else if (result->has_external_tokens) { - result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state); + } else if (self.ptr->has_external_tokens) { + result->external_scanner_state = ts_external_scanner_state_copy( + &self.ptr->external_scanner_state + ); } result->ref_count = 1; - ts_subtree_release(pool, self); return (MutableSubtree) {.ptr = result}; } -static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language, - MutableSubtreeArray *stack) { +// Get mutable version of a subtree. +// +// This takes ownership of the subtree. If the subtree has only one owner, +// this will directly convert it into a mutable version. Otherwise, it will +// perform a copy. +MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { + if (self.data.is_inline) return (MutableSubtree) {self.data}; + if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); + MutableSubtree result = ts_subtree_clone(self, NULL); + ts_subtree_release(pool, self); + return result; +} + +static void ts_subtree__compress( + MutableSubtree self, + unsigned count, + const TSLanguage *language, + MutableSubtreeArray *stack +) { unsigned initial_stack_size = stack->size; MutableSubtree tree = self; @@ -276,7 +304,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa for (unsigned i = 0; i < count; i++) { if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; - MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]); + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); if ( child.data.is_inline || child.ptr->child_count < 2 || @@ -284,7 +312,7 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa child.ptr->symbol != symbol ) break; - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); if ( grandchild.data.is_inline || grandchild.ptr->child_count < 2 || @@ -292,20 +320,20 @@ static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLa grandchild.ptr->symbol != symbol ) break; - tree.ptr->children[0] = ts_subtree_from_mut(grandchild); - child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1]; - grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); + ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); + ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; + ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); array_push(stack, tree); tree = grandchild; } while (stack->size > initial_stack_size) { tree = array_pop(stack); - MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]); - MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]); - ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language); - ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language); - ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language); + MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); + MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); + ts_subtree_summarize_children(grandchild, language); + ts_subtree_summarize_children(child, language); + ts_subtree_summarize_children(tree, language); } } @@ -320,8 +348,8 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu MutableSubtree tree = array_pop(&pool->tree_stack); if (tree.ptr->repeat_depth > 0) { - Subtree child1 = tree.ptr->children[0]; - Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1]; + Subtree child1 = ts_subtree_children(tree)[0]; + Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); if (repeat_delta > 0) { unsigned n = repeat_delta; @@ -333,7 +361,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu } for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = tree.ptr->children[i]; + Subtree child = ts_subtree_children(tree)[i]; if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); } @@ -341,17 +369,13 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu } } -void ts_subtree_set_children( - MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language +// Assign all of the node's properties that depend on its children. +void ts_subtree_summarize_children( + MutableSubtree self, + const TSLanguage *language ) { assert(!self.data.is_inline); - if (self.ptr->child_count > 0 && children != self.ptr->children) { - ts_free(self.ptr->children); - } - - self.ptr->child_count = child_count; - self.ptr->children = children; self.ptr->named_child_count = 0; self.ptr->visible_child_count = 0; self.ptr->error_cost = 0; @@ -364,8 +388,9 @@ void ts_subtree_set_children( const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t lookahead_end_byte = 0; + const Subtree *children = ts_subtree_children(self); for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = self.ptr->children[i]; + Subtree child = children[i]; if (i == 0) { self.ptr->padding = ts_subtree_padding(child); @@ -384,6 +409,17 @@ void ts_subtree_set_children( self.ptr->error_cost += ts_subtree_error_cost(child); } + uint32_t grandchild_count = ts_subtree_child_count(child); + if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) { + if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { + if (ts_subtree_visible(child)) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; + } else if (grandchild_count > 0) { + self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; + } + } + } + self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); self.ptr->node_count += ts_subtree_node_count(child); @@ -395,7 +431,7 @@ void ts_subtree_set_children( } else if (ts_subtree_visible(child)) { self.ptr->visible_child_count++; if (ts_subtree_named(child)) self.ptr->named_child_count++; - } else if (ts_subtree_child_count(child) > 0) { + } else if (grandchild_count > 0) { self.ptr->visible_child_count += child.ptr->visible_child_count; self.ptr->named_child_count += child.ptr->named_child_count; } @@ -417,22 +453,11 @@ void ts_subtree_set_children( ERROR_COST_PER_RECOVERY + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = self.ptr->children[i]; - uint32_t grandchild_count = ts_subtree_child_count(child); - if (ts_subtree_extra(child)) continue; - if (ts_subtree_is_error(child) && grandchild_count == 0) continue; - if (ts_subtree_visible(child)) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; - } else if (grandchild_count > 0) { - self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; - } - } } if (self.ptr->child_count > 0) { - Subtree first_child = self.ptr->children[0]; - Subtree last_child = self.ptr->children[self.ptr->child_count - 1]; + Subtree first_child = children[0]; + Subtree last_child = children[self.ptr->child_count - 1]; self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); @@ -455,15 +480,30 @@ void ts_subtree_set_children( } } -MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned production_id, - const TSLanguage *language) { +// Create a new parent node with the given children. +// +// This takes ownership of the children array. +MutableSubtree ts_subtree_new_node( + TSSymbol symbol, + SubtreeArray *children, + unsigned production_id, + const TSLanguage *language +) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; - SubtreeHeapData *data = ts_subtree_pool_allocate(pool); + + // Allocate the node's data at the end of the array of children. + size_t new_byte_size = ts_subtree_alloc_size(children->size); + if (children->capacity * sizeof(Subtree) < new_byte_size) { + children->contents = ts_realloc(children->contents, new_byte_size); + children->capacity = new_byte_size / sizeof(Subtree); + } + SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; + *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, + .child_count = children->size, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -477,32 +517,45 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, }} }; MutableSubtree result = {.ptr = data}; - ts_subtree_set_children(result, children->contents, children->size, language); + ts_subtree_summarize_children(result, language); return result; } -Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children, - bool extra, const TSLanguage *language) { +// Create a new error node contaning the given children. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_error_node( + SubtreeArray *children, + bool extra, + const TSLanguage *language +) { MutableSubtree result = ts_subtree_new_node( - pool, ts_builtin_sym_error, children, 0, language + ts_builtin_sym_error, children, 0, language ); result.ptr->extra = extra; return ts_subtree_from_mut(result); } -Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, - const TSLanguage *language) { +// Create a new 'missing leaf' node. +// +// This node is treated as 'extra'. Its children are prevented from having +// having any effect on the parse state. +Subtree ts_subtree_new_missing_leaf( + SubtreePool *pool, + TSSymbol symbol, + Length padding, + const TSLanguage *language +) { Subtree result = ts_subtree_new_leaf( pool, symbol, padding, length_zero(), 0, 0, false, false, language ); - if (result.data.is_inline) { result.data.is_missing = true; } else { ((SubtreeHeapData *)result.ptr)->is_missing = true; } - return result; } @@ -525,19 +578,22 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) { while (pool->tree_stack.size > 0) { MutableSubtree tree = array_pop(&pool->tree_stack); if (tree.ptr->child_count > 0) { + Subtree *children = ts_subtree_children(tree); for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - Subtree child = tree.ptr->children[i]; + Subtree child = children[i]; if (child.data.is_inline) continue; assert(child.ptr->ref_count > 0); if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); } } - ts_free(tree.ptr->children); - } else if (tree.ptr->has_external_tokens) { - ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); + ts_free(children); + } else { + if (tree.ptr->has_external_tokens) { + ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); + } + ts_subtree_pool_free(pool, tree.ptr); } - ts_subtree_pool_free(pool, tree.ptr); } } @@ -564,7 +620,7 @@ bool ts_subtree_eq(Subtree self, Subtree other) { if (self.ptr->named_child_count != other.ptr->named_child_count) return false; for (uint32_t i = 0; i < self.ptr->child_count; i++) { - if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) { + if (!ts_subtree_eq(ts_subtree_children(self)[i], ts_subtree_children(other)[i])) { return false; } } @@ -578,8 +634,8 @@ int ts_subtree_compare(Subtree left, Subtree right) { if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1; if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1; for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) { - Subtree left_child = left.ptr->children[i]; - Subtree right_child = right.ptr->children[i]; + Subtree left_child = ts_subtree_children(left)[i]; + Subtree right_child = ts_subtree_children(right)[i]; switch (ts_subtree_compare(left_child, right_child)) { case -1: return -1; case 1: return 1; @@ -695,7 +751,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool Length child_left, child_right = length_zero(); for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { - Subtree *child = &result.ptr->children[i]; + Subtree *child = &ts_subtree_children(*entry.tree)[i]; Length child_size = ts_subtree_total_size(*child); child_left = child_right; child_right = length_add(child_left, child_size); @@ -750,7 +806,7 @@ Subtree ts_subtree_last_external_token(Subtree tree) { if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; while (tree.ptr->child_count > 0) { for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { - Subtree child = tree.ptr->children[i]; + Subtree child = ts_subtree_children(tree)[i]; if (ts_subtree_has_external_tokens(child)) { tree = child; break; @@ -853,7 +909,7 @@ static size_t ts_subtree__write_to_string( uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { - Subtree child = self.ptr->children[i]; + Subtree child = ts_subtree_children(self)[i]; if (ts_subtree_extra(child)) { cursor += ts_subtree__write_to_string( child, *writer, limit, @@ -950,7 +1006,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, language->max_alias_sequence_length * ts_subtree_production_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { - const Subtree *child = &self->ptr->children[i]; + const Subtree *child = &ts_subtree_children(*self)[i]; TSSymbol alias_symbol = 0; if (!ts_subtree_extra(*child) && child_info_offset) { alias_symbol = language->alias_sequences[child_info_offset]; diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 18c48dcbd0..7df8b09acc 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -68,7 +68,6 @@ typedef struct { union { // Non-terminal subtrees (`child_count > 0`) struct { - Subtree *children; uint32_t visible_child_count; uint32_t named_child_count; uint32_t node_count; @@ -111,8 +110,9 @@ void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsign const char *ts_external_scanner_state_data(const ExternalScannerState *); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); +void ts_subtree_array_clear(SubtreePool *, SubtreeArray *); void ts_subtree_array_delete(SubtreePool *, SubtreeArray *); -SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *); +void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *); SubtreePool ts_subtree_pool_new(uint32_t capacity); @@ -125,8 +125,8 @@ Subtree ts_subtree_new_leaf( Subtree ts_subtree_new_error( SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * ); -MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); -Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *); +MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *); +Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *); Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); void ts_subtree_retain(Subtree); @@ -134,13 +134,15 @@ void ts_subtree_release(SubtreePool *, Subtree); bool ts_subtree_eq(Subtree, Subtree); int ts_subtree_compare(Subtree, Subtree); void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); -void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *); +void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); +void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); +MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse); #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name) @@ -156,6 +158,10 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE #undef SUBTREE_GET +static inline size_t ts_subtree_alloc_size(uint32_t child_count) { + return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); +} + static inline void ts_subtree_set_extra(MutableSubtree *self) { if (self->data.is_inline) { self->data.extra = true; @@ -202,6 +208,9 @@ static inline uint32_t ts_subtree_total_bytes(Subtree self) { return ts_subtree_total_size(self).bytes; } +#define ts_subtree_children(self) \ + ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) + static inline uint32_t ts_subtree_child_count(Subtree self) { return self.data.is_inline ? 0 : self.ptr->child_count; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index f109524e87..8af44a343b 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -38,7 +38,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, TreeCursorEntry *result, bool *visible) { if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; - const Subtree *child = &self->parent.ptr->children[self->child_index]; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; *result = (TreeCursorEntry) { .subtree = child, .position = self->position, @@ -56,7 +56,7 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, self->child_index++; if (self->child_index < self->parent.ptr->child_count) { - Subtree next_child = self->parent.ptr->children[self->child_index]; + Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; self->position = length_add(self->position, ts_subtree_padding(next_child)); } @@ -306,7 +306,7 @@ void ts_tree_cursor_current_status( unsigned structural_child_index = entry->structural_child_index; if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { - Subtree sibling = parent_entry->subtree->ptr->children[j]; + Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( self->tree->language, subtree_symbol(sibling, structural_child_index) From 908b102786f4d5a6c40e63233b59ca5be3e705ba Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 11:58:27 -0700 Subject: [PATCH 219/282] Add more doc comments in the C lib --- lib/src/parser.c | 16 +++++++++++++--- lib/src/subtree.c | 11 +++-------- lib/src/subtree.h | 45 +++++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 3984d0021b..0c711b0ce3 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -673,6 +673,10 @@ static Subtree ts_parser__reuse_node( return NULL_SUBTREE; } +// Determine if a given tree should be replaced by an alternative tree. +// +// The decision is based on the trees' error costs (if any), their dynamic precedence, +// and finally, as a default, by a recursive comparison of the trees' symbols. static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { if (!left.ptr) return true; if (!right.ptr) return false; @@ -718,18 +722,26 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) } } +// Determine if a given tree's children should be replaced by an alternative +// array of children. static bool ts_parser__select_children( TSParser *self, Subtree left, const SubtreeArray *children ) { array_assign(&self->scratch_trees, children); + + // Create a temporary subtree using the scratch trees array. This node does + // not perform any allocation except for possibly growing the array to make + // room for its own heap data. The scratch tree is never explicitly released, + // so the same 'scratch trees' array can be reused again later. MutableSubtree scratch_tree = ts_subtree_new_node( ts_subtree_symbol(left), &self->scratch_trees, 0, self->language ); + return ts_parser__select_tree( self, left, @@ -841,9 +853,6 @@ static StackVersion ts_parser__reduce( } } - parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->production_id = production_id; - TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); if (end_of_non_terminal_extra && next_state == state) { @@ -856,6 +865,7 @@ static StackVersion ts_parser__reduce( } else { parent.ptr->parse_state = state; } + parent.ptr->dynamic_precedence += dynamic_precedence; // Push the parent node onto the stack, along with any extra tokens that // were previously on top of the stack. diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 4d3986ec38..fc1db617d6 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -254,15 +254,10 @@ Subtree ts_subtree_new_error( return result; } -// Get the size needed to store a heap-allocated subtree with -// the given number o children. // Clone a subtree. -// -// This will reuse the given allocated buffer if it is present. If the -// buffer is NULL, a new allocation will be created. -MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { +MutableSubtree ts_subtree_clone(Subtree self) { size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *children = ts_realloc(buffer_to_reuse, alloc_size); + Subtree *children = ts_malloc(alloc_size); memcpy(children, ts_subtree_children(self), alloc_size); SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; if (self.ptr->child_count > 0) { @@ -286,7 +281,7 @@ MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse) { MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { if (self.data.is_inline) return (MutableSubtree) {self.data}; if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - MutableSubtree result = ts_subtree_clone(self, NULL); + MutableSubtree result = ts_subtree_clone(self); ts_subtree_release(pool, self); return result; } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 7df8b09acc..b020deb61a 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -14,12 +14,19 @@ extern "C" { #include "tree_sitter/api.h" #include "tree_sitter/parser.h" -static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX; +#define TS_TREE_STATE_NONE USHRT_MAX #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) -typedef union Subtree Subtree; -typedef union MutableSubtree MutableSubtree; - +// The serialized state of an external scanner. +// +// Every time an external token subtree is created after a call to an +// external scanner, the scanner's `serialize` function is called to +// retrieve a serialized copy of its state. The bytes are then copied +// onto the subtree itself so that the scanner's state can later be +// restored using its `deserialize` function. +// +// Small byte arrays are stored inline, and long ones are allocated +// separately on the heap. typedef struct { union { char *long_data; @@ -28,6 +35,10 @@ typedef struct { uint32_t length; } ExternalScannerState; +// A compact representation of a subtree. +// +// This representation is used for small leaf nodes that are not +// errors, and were not created by an external scanner. typedef struct { bool is_inline : 1; bool visible : 1; @@ -45,6 +56,11 @@ typedef struct { uint16_t parse_state; } SubtreeInlineData; +// A heap-allocated representation of a subtree. +// +// This representation is used for parent nodes, external tokens, +// errors, and other leaf nodes whose data is too large to fit into +// the inlinen representation. typedef struct { volatile uint32_t ref_count; Length padding; @@ -88,15 +104,17 @@ typedef struct { }; } SubtreeHeapData; -union Subtree { +// The fundamental building block of a syntax tree. +typedef union { SubtreeInlineData data; const SubtreeHeapData *ptr; -}; +} Subtree; -union MutableSubtree { +// Like Subtree, but mutable. +typedef union { SubtreeInlineData data; SubtreeHeapData *ptr; -}; +} MutableSubtree; typedef Array(Subtree) SubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray; @@ -142,7 +160,6 @@ char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); -MutableSubtree ts_subtree_clone(Subtree self, Subtree *buffer_to_reuse); #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name) @@ -158,10 +175,17 @@ static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE #undef SUBTREE_GET +// Get the size needed to store a heap-allocated subtree with the given +// number of children. static inline size_t ts_subtree_alloc_size(uint32_t child_count) { return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); } +// Get a subtree's children, which are allocated immediately before the +// tree's own heap data. +#define ts_subtree_children(self) \ + ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) + static inline void ts_subtree_set_extra(MutableSubtree *self) { if (self->data.is_inline) { self->data.extra = true; @@ -208,9 +232,6 @@ static inline uint32_t ts_subtree_total_bytes(Subtree self) { return ts_subtree_total_size(self).bytes; } -#define ts_subtree_children(self) \ - ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) - static inline uint32_t ts_subtree_child_count(Subtree self) { return self.data.is_inline ? 0 : self.ptr->child_count; } From b972a7158d61631208b3a7aaa94a08aa2132df83 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 22 Oct 2020 15:55:51 -0700 Subject: [PATCH 220/282] Tweak cancellation logic for CLI commands In 'parse' and 'highlight' cancel on stdin if stdin is a tty. --- Cargo.lock | 599 +++++++++++++++++++++---------------------- cli/Cargo.toml | 1 + cli/src/highlight.rs | 28 +- cli/src/main.rs | 38 ++- cli/src/parse.rs | 17 +- cli/src/util.rs | 18 +- 6 files changed, 358 insertions(+), 343 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a85e9dada1..ea918eb687 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,989 +4,988 @@ name = "aho-corasick" version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" dependencies = [ - "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr", ] [[package]] name = "ansi_term" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" dependencies = [ - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "arrayref" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" [[package]] name = "arrayvec" version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" dependencies = [ - "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop", ] [[package]] name = "ascii" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14" [[package]] name = "atty" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "termion", + "winapi", ] [[package]] name = "autocfg" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" [[package]] name = "backtrace" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" dependencies = [ - "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace-sys", + "cfg-if", + "libc", + "rustc-demangle", + "winapi", ] [[package]] name = "backtrace-sys" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "libc", ] [[package]] name = "base64" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" dependencies = [ - "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder", ] [[package]] name = "bitflags" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" [[package]] name = "blake2b_simd" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" dependencies = [ - "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", - "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "arrayref", + "arrayvec", + "constant_time_eq", ] [[package]] name = "byteorder" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" [[package]] name = "c2-chacha" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", + "ppv-lite86", ] [[package]] name = "cc" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" [[package]] name = "cfg-if" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" [[package]] name = "chrono" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" dependencies = [ - "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", - "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer", + "num-traits", + "time", ] [[package]] name = "chunked_transfer" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87" [[package]] name = "clap" version = "2.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", ] [[package]] name = "cloudabi" version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags", ] [[package]] name = "constant_time_eq" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" [[package]] name = "crossbeam-utils" version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "lazy_static", ] [[package]] name = "difference" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] name = "dirs" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "dirs-sys", ] [[package]] name = "dirs-sys" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "redox_users", + "winapi", ] [[package]] name = "failure" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" dependencies = [ - "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace", + "failure_derive", ] [[package]] name = "failure_derive" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", - "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", + "synstructure", ] [[package]] name = "fuchsia-zircon" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags", + "fuchsia-zircon-sys", ] [[package]] name = "fuchsia-zircon-sys" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" [[package]] name = "getrandom" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", ] [[package]] name = "glob" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "idna" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches", + "unicode-bidi", + "unicode-normalization", ] [[package]] name = "indexmap" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" [[package]] name = "itoa" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" [[package]] name = "lazy_static" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" [[package]] name = "libc" version = "0.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" [[package]] name = "libloading" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "winapi", ] [[package]] name = "lock_api" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" dependencies = [ - "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard", ] [[package]] name = "log" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", ] [[package]] name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" [[package]] name = "memchr" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" [[package]] name = "nodrop" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" [[package]] name = "num-integer" version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" dependencies = [ - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits", ] [[package]] name = "num-traits" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" [[package]] name = "once_cell" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" dependencies = [ - "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot", ] [[package]] name = "parking_lot" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" dependencies = [ - "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lock_api", + "parking_lot_core", ] [[package]] name = "parking_lot_core" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "rand 0.6.4", + "rustc_version", + "smallvec", + "winapi", ] [[package]] name = "percent-encoding" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" [[package]] name = "ppv-lite86" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" [[package]] name = "proc-macro2" version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" dependencies = [ - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid", ] [[package]] name = "quote" version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", ] [[package]] name = "rand" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5" dependencies = [ - "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "libc", + "rand_chacha 0.1.1", + "rand_core 0.3.0", + "rand_hc 0.1.0", + "rand_isaac", + "rand_os", + "rand_pcg", + "rand_xorshift", + "winapi", ] [[package]] name = "rand" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" dependencies = [ - "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", + "libc", + "rand_chacha 0.2.1", + "rand_core 0.5.0", + "rand_hc 0.2.0", ] [[package]] name = "rand_chacha" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" dependencies = [ - "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "rand_core 0.3.0", ] [[package]] name = "rand_chacha" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" dependencies = [ - "c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "c2-chacha", + "rand_core 0.5.0", ] [[package]] name = "rand_core" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" [[package]] name = "rand_core" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca" dependencies = [ - "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", ] [[package]] name = "rand_hc" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rand_hc" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.0", ] [[package]] name = "rand_isaac" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rand_os" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca" dependencies = [ - "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cloudabi", + "fuchsia-zircon", + "libc", + "rand_core 0.3.0", + "rdrand", + "winapi", ] [[package]] name = "rand_pcg" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", + "rustc_version", ] [[package]] name = "rand_xorshift" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rdrand" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "redox_syscall" version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" [[package]] name = "redox_termios" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" dependencies = [ - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall", ] [[package]] name = "redox_users" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" dependencies = [ - "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "failure", + "rand_os", + "redox_syscall", + "rust-argon2", ] [[package]] name = "regex" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" dependencies = [ - "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", + "utf8-ranges", ] [[package]] name = "regex-syntax" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" dependencies = [ - "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ucd-util", ] [[package]] name = "remove_dir_all" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" dependencies = [ - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "rust-argon2" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" dependencies = [ - "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", - "blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", + "base64", + "blake2b_simd", + "crossbeam-utils", ] [[package]] name = "rustc-demangle" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" [[package]] name = "rustc_version" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" dependencies = [ - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "semver", ] [[package]] name = "ryu" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" [[package]] name = "scopeguard" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" [[package]] name = "semver" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "semver-parser", ] [[package]] name = "semver-parser" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" [[package]] name = "serde_derive" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "serde_json" version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" dependencies = [ - "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap", + "itoa", + "ryu", + "serde", ] [[package]] name = "smallbitvec" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" [[package]] name = "smallvec" version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" dependencies = [ - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unreachable", ] [[package]] name = "spin" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" [[package]] name = "strsim" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" [[package]] name = "syn" version = "0.15.22" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "unicode-xid", ] [[package]] name = "synstructure" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", + "unicode-xid", ] [[package]] name = "tempfile" version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "rand 0.6.4", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "termion" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "redox_syscall", + "redox_termios", ] [[package]] name = "textwrap" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" dependencies = [ - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width", ] [[package]] name = "thread_local" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", ] [[package]] name = "time" version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "redox_syscall", + "winapi", ] [[package]] name = "tiny_http" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951" dependencies = [ - "ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)", - "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "ascii", + "chrono", + "chunked_transfer", + "log", + "url", ] [[package]] name = "tree-sitter" version = "0.17.0" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "regex", ] [[package]] name = "tree-sitter-cli" version = "0.17.1" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", - "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.17.0", - "tree-sitter-highlight 0.3.0", - "tree-sitter-tags 0.3.0", - "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ansi_term", + "atty", + "cc", + "clap", + "difference", + "dirs", + "glob", + "lazy_static", + "libloading", + "log", + "once_cell", + "rand 0.7.0", + "regex", + "regex-syntax", + "serde", + "serde_derive", + "serde_json", + "smallbitvec", + "spin", + "tempfile", + "tiny_http", + "tree-sitter", + "tree-sitter-highlight", + "tree-sitter-tags", + "webbrowser", ] [[package]] name = "tree-sitter-highlight" version = "0.3.0" dependencies = [ - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.17.0", + "regex", + "tree-sitter", ] [[package]] name = "tree-sitter-tags" version = "0.3.0" dependencies = [ - "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.17.0", + "memchr", + "regex", + "tree-sitter", ] [[package]] name = "ucd-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" [[package]] name = "unicode-bidi" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches", ] [[package]] name = "unicode-normalization" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" dependencies = [ - "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec", ] [[package]] name = "unicode-width" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" [[package]] name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" [[package]] name = "unreachable" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "void", ] [[package]] name = "url" version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" dependencies = [ - "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "idna", + "matches", + "percent-encoding", ] [[package]] name = "utf8-ranges" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" [[package]] name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" [[package]] name = "void" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "webbrowser" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac" dependencies = [ - "widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "widestring", + "winapi", ] [[package]] name = "widestring" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6" [[package]] name = "winapi" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" -"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" -"checksum arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" -"checksum ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14" -"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" -"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" -"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" -"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" -"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" -"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" -"checksum blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" -"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" -"checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" -"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" -"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" -"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" -"checksum chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87" -"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" -"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" -"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" -"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" -"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" -"checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" -"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" -"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" -"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" -"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" -"checksum getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49" -"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" -"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" -"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" -"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" -"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" -"checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" -"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" -"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" -"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" -"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -"checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" -"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" -"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" -"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" -"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" -"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" -"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" -"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" -"checksum ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" -"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" -"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" -"checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5" -"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" -"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" -"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" -"checksum rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca" -"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -"checksum rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca" -"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" -"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" -"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" -"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" -"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" -"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" -"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" -"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" -"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" -"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" -"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" -"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" -"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" -"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" -"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" -"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" -"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" -"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" -"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" -"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" -"checksum tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a" -"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" -"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" -"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" -"checksum tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951" -"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" -"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" -"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" -"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" -"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" -"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" -"checksum webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac" -"checksum widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6" -"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c3d183e1ef..21a8fa0fc4 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -21,6 +21,7 @@ harness = false [dependencies] ansi_term = "0.11" cc = "1.0" +atty = "0.2" clap = "2.32" difference = "2.0" dirs = "2.0.2" diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index c6b1193d65..330c9e57d3 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -7,6 +7,7 @@ use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; use std::collections::HashMap; +use std::sync::atomic::AtomicUsize; use std::time::Instant; use std::{fs, io, path, str, usize}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer}; @@ -278,14 +279,14 @@ pub fn ansi( source: &[u8], config: &HighlightConfiguration, print_time: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); let time = Instant::now(); - let cancellation_flag = util::cancel_on_stdin(); let mut highlighter = Highlighter::new(); - let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| { + let events = highlighter.highlight(config, source, cancellation_flag, |string| { loader.highlight_config_for_injection_string(string) })?; @@ -320,6 +321,7 @@ pub fn html( theme: &Theme, source: &[u8], config: &HighlightConfiguration, + quiet: bool, print_time: bool, ) -> Result<()> { use std::io::Write; @@ -343,17 +345,19 @@ pub fn html( } })?; - write!(&mut stdout, "\n")?; - for (i, line) in renderer.lines().enumerate() { - write!( - &mut stdout, - "\n", - i + 1, - line - )?; - } + if !quiet { + write!(&mut stdout, "
{}{}
\n")?; + for (i, line) in renderer.lines().enumerate() { + write!( + &mut stdout, + "\n", + i + 1, + line + )?; + } - write!(&mut stdout, "
{}{}
\n")?; + write!(&mut stdout, "\n")?; + } if print_time { eprintln!("Time: {}ms", time.elapsed().as_millis()); diff --git a/cli/src/main.rs b/cli/src/main.rs index 2e55c2fba4..1eaa6a7571 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -7,7 +7,7 @@ use std::{env, fs, u64}; use tree_sitter::Language; use tree_sitter_cli::{ config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight, - wasm, web_ui, + util, wasm, web_ui, }; const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -66,7 +66,6 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("quiet").long("quiet").short("q")) .arg(Arg::with_name("stat").long("stat").short("s")) .arg(Arg::with_name("time").long("time").short("t")) - .arg(Arg::with_name("allow-cancellation").long("cancel")) .arg(Arg::with_name("timeout").long("timeout").takes_value(true)) .arg( Arg::with_name("edits") @@ -135,7 +134,7 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")) .arg(Arg::with_name("time").long("time").short("t")) - .arg(Arg::with_name("q").short("q")), + .arg(Arg::with_name("quiet").long("quiet").short("q")), ) .subcommand( SubCommand::with_name("build-wasm") @@ -225,7 +224,8 @@ fn run() -> error::Result<()> { let edits = matches .values_of("edits") .map_or(Vec::new(), |e| e.collect()); - let allow_cancellation = matches.is_present("allow-cancellation"); + let cancellation_flag = util::cancel_on_stdin(); + let timeout = matches .value_of("timeout") .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); @@ -254,7 +254,7 @@ fn run() -> error::Result<()> { timeout, debug, debug_graph, - allow_cancellation, + Some(&cancellation_flag), )?; if should_track_stats { @@ -305,12 +305,16 @@ fn run() -> error::Result<()> { loader.find_all_languages(&config.parser_directories)?; let time = matches.is_present("time"); + let quiet = matches.is_present("quiet"); + let html_mode = quiet || matches.is_present("html"); let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; - let html_mode = matches.is_present("html"); - if html_mode { + + if html_mode && !quiet { println!("{}", highlight::HTML_HEADER); } + let cancellation_flag = util::cancel_on_stdin(); + let mut lang = None; if let Some(scope) = matches.value_of("scope") { lang = loader.language_configuration_for_scope(scope)?; @@ -335,16 +339,30 @@ fn run() -> error::Result<()> { if let Some(highlight_config) = language_config.highlight_config(language)? { let source = fs::read(path)?; if html_mode { - highlight::html(&loader, &config.theme, &source, highlight_config, time)?; + highlight::html( + &loader, + &config.theme, + &source, + highlight_config, + quiet, + time, + )?; } else { - highlight::ansi(&loader, &config.theme, &source, highlight_config, time)?; + highlight::ansi( + &loader, + &config.theme, + &source, + highlight_config, + time, + Some(&cancellation_flag), + )?; } } else { eprintln!("No syntax highlighting config found for path {:?}", path); } } - if html_mode { + if html_mode && !quiet { println!("{}", highlight::HTML_FOOTER); } } else if let Some(matches) = matches.subcommand_matches("build-wasm") { diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 499bef1f33..4d66df1d2b 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -2,9 +2,9 @@ use super::error::{Error, Result}; use super::util; use std::io::{self, Write}; use std::path::Path; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::AtomicUsize; use std::time::Instant; -use std::{fmt, fs, thread, usize}; +use std::{fmt, fs, usize}; use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree}; #[derive(Debug)] @@ -40,7 +40,7 @@ pub fn parse_file_at_path( timeout: u64, debug: bool, debug_graph: bool, - allow_cancellation: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); @@ -51,16 +51,7 @@ pub fn parse_file_at_path( // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. - if allow_cancellation { - let flag = Box::new(AtomicUsize::new(0)); - unsafe { parser.set_cancellation_flag(Some(&flag)) }; - thread::spawn(move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - eprintln!("Cancelling"); - flag.store(1, Ordering::Relaxed); - }); - } + unsafe { parser.set_cancellation_flag(cancellation_flag) }; // Set a timeout based on the `--time` flag. parser.set_timeout_micros(timeout); diff --git a/cli/src/util.rs b/cli/src/util.rs index 9f941f62ae..acafa6621c 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -15,14 +15,16 @@ const HTML_HEADER: &[u8] = b"\n Arc { let result = Arc::new(AtomicUsize::new(0)); - thread::spawn({ - let flag = result.clone(); - move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - flag.store(1, Ordering::Relaxed); - } - }); + if atty::is(atty::Stream::Stdin) { + thread::spawn({ + let flag = result.clone(); + move || { + let mut line = String::new(); + io::stdin().read_line(&mut line).unwrap(); + flag.store(1, Ordering::Relaxed); + } + }); + } result } #[cfg(windows)] From 533aaa462b301187aeff4fb7ad096e68c73c6545 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 13:20:57 -0700 Subject: [PATCH 221/282] Add heap-profiling script --- script/heap-profile | 34 ++++++++++++++++++++++++++++++++++ test/profile/heap.cc | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100755 script/heap-profile create mode 100644 test/profile/heap.cc diff --git a/script/heap-profile b/script/heap-profile new file mode 100755 index 0000000000..012d86c74b --- /dev/null +++ b/script/heap-profile @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# Usage: +# script/heap-profile +# +# Parse an example source file and record memory usage +# +# Dependencies: +# * `pprof` executable: https://github.com/google/pprof +# * `gperftools` package: https://github.com/gperftools/gperftools + +set -e + +GRAMMARS_DIR=$PWD/test/fixtures/grammars + +# Build the library +make + +# Build the heap-profiling harness +clang++ \ + -I lib/include \ + -I $GRAMMARS_DIR \ + -D GRAMMARS_DIR=\"${GRAMMARS_DIR}/\" \ + -l tcmalloc \ + ./libtree-sitter.a \ + test/profile/heap.cc \ + -o target/heap-profile + +# Run the harness with heap profiling enabled. +export HEAPPROFILE=$PWD/profile +target/heap-profile $@ + +# Extract statistics using pprof. +pprof -top -cum profile.0001.heap diff --git a/test/profile/heap.cc b/test/profile/heap.cc new file mode 100644 index 0000000000..6c0027e871 --- /dev/null +++ b/test/profile/heap.cc @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +extern "C" { +#include "javascript/src/parser.c" +#include "javascript/src/scanner.c" +} + +#define LANGUAGE tree_sitter_javascript +#define SOURCE_PATH "javascript/examples/jquery.js" + +int main() { + TSParser *parser = ts_parser_new(); + if (!ts_parser_set_language(parser, LANGUAGE())) { + fprintf(stderr, "Invalid language\n"); + exit(1); + } + + const char *source_path = GRAMMARS_DIR SOURCE_PATH; + + printf("Parsing %s\n", source_path); + + std::ifstream source_file(source_path); + if (!source_file.good()) { + fprintf(stderr, "Invalid source path %s\n", source_path); + exit(1); + } + + std::string source_code( + (std::istreambuf_iterator(source_file)), + std::istreambuf_iterator() + ); + + TSTree *tree = ts_parser_parse_string( + parser, + NULL, + source_code.c_str(), + source_code.size() + ); +} From 9774f760c37e39aa54a58f861c606c150348559c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Oct 2020 18:44:58 -0700 Subject: [PATCH 222/282] Disable clang static analyzer on CI All of its output seems to be false positives. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ab9a686666..7205ae03f1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,6 @@ script: - (eval "$WASM_ENV" && script/generate-fixtures-wasm) # Run the tests - - export TREE_SITTER_STATIC_ANALYSIS=1 - script/test - script/test-wasm - script/benchmark From 2465207fee94bf07b0b5e950c2958d2b558b21dd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 25 Oct 2020 09:13:12 -0700 Subject: [PATCH 223/282] Suppress false non-null error in subtree_clone --- lib/src/subtree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/src/subtree.c b/lib/src/subtree.c index fc1db617d6..e90dc9d7fd 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -257,12 +257,13 @@ Subtree ts_subtree_new_error( // Clone a subtree. MutableSubtree ts_subtree_clone(Subtree self) { size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - Subtree *children = ts_malloc(alloc_size); - memcpy(children, ts_subtree_children(self), alloc_size); - SubtreeHeapData *result = (SubtreeHeapData *)&children[self.ptr->child_count]; + Subtree *new_children = ts_malloc(alloc_size); + Subtree *old_children = ts_subtree_children(self); + memcpy(new_children, old_children, alloc_size); + SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; if (self.ptr->child_count > 0) { for (uint32_t i = 0; i < self.ptr->child_count; i++) { - ts_subtree_retain(children[i]); + ts_subtree_retain(new_children[i]); } } else if (self.ptr->has_external_tokens) { result->external_scanner_state = ts_external_scanner_state_copy( From 3e18e97f7c1e8806bcf471b69b37de65e9a6572e Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 26 Oct 2020 12:58:32 -0400 Subject: [PATCH 224/282] start pulling in the stuff from test_highlight --- cli/src/query.rs | 2 +- cli/src/query/assert.rs | 11 +++++++++-- test/fixtures/queries/python.py | 6 +++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/cli/src/query.rs b/cli/src/query.rs index 704a2c5611..de320fba2c 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -93,7 +93,7 @@ pub fn query_files_at_paths( } } if should_test { - assert::assert_expected_captures(results, path)? + assert::assert_expected_captures(results, path, &mut parser, language)? } } diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 96162c5bed..69f0de919e 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -1,10 +1,11 @@ use crate::error; use crate::error::Result; +use crate::test_highlight::parse_highlight_test; use lazy_static::lazy_static; use regex::Regex; use std::collections::hash_map::HashMap; use std::fs; -use tree_sitter::Point; +use tree_sitter::{Language, Parser, Point}; // TODO: It would be cooler to do this with a comments query rather than with a regex // directly. @@ -48,8 +49,14 @@ impl From> for Assertion { } } -pub fn assert_expected_captures(infos: Vec, path: String) -> Result<()> { +pub fn assert_expected_captures( + infos: Vec, + path: String, + parser: &mut Parser, + language: Language, +) -> Result<()> { let contents = fs::read_to_string(path)?; + let _pairs = parse_highlight_test(parser, language, contents.as_bytes()); let assertions: Vec = METADATA_REGEX .captures_iter(&contents) diff --git a/test/fixtures/queries/python.py b/test/fixtures/queries/python.py index a48ed2de92..01ec9ab068 100644 --- a/test/fixtures/queries/python.py +++ b/test/fixtures/queries/python.py @@ -1,7 +1,7 @@ def foo(): pass -# definition: function: 0, 0 +# <- definition.function def bar(): -# definition: function, 3, 0 +# <- definition.function foo() -# reference: call, 5, 4 + # <- reference.call From 9af9d66e194a6ea809d0646b513015829ecf3343 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 26 Oct 2020 13:13:25 -0400 Subject: [PATCH 225/282] it works --- cli/src/query/assert.rs | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 69f0de919e..5f042f3b26 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -22,29 +22,15 @@ pub struct CaptureInfo { #[derive(Debug, Eq, PartialEq)] struct Assertion { position: Point, - capture_class: String, - capture_type: String, + expected: String, } -impl From> for Assertion { - fn from(re: regex::Captures) -> Assertion { +impl From<&(Point, String)> for Assertion { + fn from(item: &(Point, String)) -> Assertion { + let (pos, info) = item; Assertion { - capture_class: re.get(1).unwrap().as_str().to_string(), - capture_type: re.get(2).unwrap().as_str().to_string(), - position: Point { - row: re - .get(3) - .iter() - .flat_map(|m| m.as_str().parse::()) - .next() - .unwrap(), - column: re - .get(4) - .iter() - .flat_map(|m| m.as_str().parse::()) - .next() - .unwrap(), - }, + position: *pos, + expected: info.to_string(), } } } @@ -56,12 +42,10 @@ pub fn assert_expected_captures( language: Language, ) -> Result<()> { let contents = fs::read_to_string(path)?; - let _pairs = parse_highlight_test(parser, language, contents.as_bytes()); + let pairs = parse_highlight_test(parser, language, contents.as_bytes())?; + println!("{:?}", pairs); - let assertions: Vec = METADATA_REGEX - .captures_iter(&contents) - .map(|c| Assertion::from(c)) - .collect(); + let assertions: Vec = pairs.iter().map(Assertion::from).collect(); let per_position_index: HashMap = assertions.iter().map(|a| (a.position, a)).collect(); @@ -71,11 +55,10 @@ pub fn assert_expected_captures( continue; } let found = per_position_index.get(&info.position).unwrap(); - let joined = format!("{}.{}", found.capture_class, found.capture_type); - if joined != info.name && info.name != "name" { + if found.expected != info.name && info.name != "name" { Err(error::Error::new(format!( "Assertion failed: at {}, found {}, expected {}", - info.position, info.name, joined + info.position, info.name, found.expected )))? } } From f364ce2304371cbe89f40fa14e471919dac23015 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 26 Oct 2020 13:22:12 -0400 Subject: [PATCH 226/282] Remove old assertion stuff --- cli/src/query/assert.rs | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 5f042f3b26..352f8de5ce 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -1,40 +1,16 @@ use crate::error; use crate::error::Result; use crate::test_highlight::parse_highlight_test; -use lazy_static::lazy_static; -use regex::Regex; use std::collections::hash_map::HashMap; use std::fs; use tree_sitter::{Language, Parser, Point}; -// TODO: It would be cooler to do this with a comments query rather than with a regex -// directly. -lazy_static! { - static ref METADATA_REGEX: Regex = Regex::new(r#"(\w+): ([^\s,]+), (\d+), (\d+)"#).unwrap(); -} - #[derive(Debug, Eq, PartialEq)] pub struct CaptureInfo { pub name: String, pub position: Point, } -#[derive(Debug, Eq, PartialEq)] -struct Assertion { - position: Point, - expected: String, -} - -impl From<&(Point, String)> for Assertion { - fn from(item: &(Point, String)) -> Assertion { - let (pos, info) = item; - Assertion { - position: *pos, - expected: info.to_string(), - } - } -} - pub fn assert_expected_captures( infos: Vec, path: String, @@ -45,20 +21,17 @@ pub fn assert_expected_captures( let pairs = parse_highlight_test(parser, language, contents.as_bytes())?; println!("{:?}", pairs); - let assertions: Vec = pairs.iter().map(Assertion::from).collect(); - - let per_position_index: HashMap = - assertions.iter().map(|a| (a.position, a)).collect(); + let per_position_index: HashMap = pairs.iter().map(|(a, b)| (*a, b)).collect(); for info in &infos { if !per_position_index.contains_key(&info.position) { continue; } let found = per_position_index.get(&info.position).unwrap(); - if found.expected != info.name && info.name != "name" { + if **found != info.name && info.name != "name" { Err(error::Error::new(format!( "Assertion failed: at {}, found {}, expected {}", - info.position, info.name, found.expected + info.position, info.name, found )))? } } From 1012bea3f4565231058b7c57602150b9cdd63ad3 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 26 Oct 2020 13:35:10 -0400 Subject: [PATCH 227/282] let's start sharing this code --- cli/src/query/assert.rs | 4 ++-- cli/src/test_highlight.rs | 30 ++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs index 352f8de5ce..1099295928 100644 --- a/cli/src/query/assert.rs +++ b/cli/src/query/assert.rs @@ -19,9 +19,9 @@ pub fn assert_expected_captures( ) -> Result<()> { let contents = fs::read_to_string(path)?; let pairs = parse_highlight_test(parser, language, contents.as_bytes())?; - println!("{:?}", pairs); - let per_position_index: HashMap = pairs.iter().map(|(a, b)| (*a, b)).collect(); + let per_position_index: HashMap = + pairs.iter().map(|a| (a.position, &a.expected)).collect(); for info in &infos { if !per_position_index.contains_key(&info.position) { diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index cf163c0589..45841a7b59 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -12,6 +12,11 @@ lazy_static! { static ref HIGHLIGHT_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); } +pub struct Assertion { + pub position: Point, + pub expected: String, +} + pub struct Failure { row: usize, column: usize, @@ -102,7 +107,11 @@ pub fn test_highlight( // actual highlights. let mut i = 0; let mut actual_highlights = Vec::<&String>::new(); - for (position, expected_highlight) in &assertions { + for Assertion { + position, + expected: expected_highlight, + } in &assertions + { let mut passed = false; actual_highlights.clear(); @@ -163,7 +172,7 @@ pub fn parse_highlight_test( parser: &mut Parser, language: Language, source: &[u8], -) -> Result> { +) -> Result> { let mut result = Vec::new(); let mut assertion_ranges = Vec::new(); @@ -213,7 +222,10 @@ pub fn parse_highlight_test( (has_arrow, HIGHLIGHT_NAME_REGEX.find(&text[arrow_end..])) { assertion_ranges.push((node.start_position(), node.end_position())); - result.push((position, mat.as_str().to_string())); + result.push(Assertion { + position: position, + expected: mat.as_str().to_string(), + }); } } } @@ -233,15 +245,17 @@ pub fn parse_highlight_test( // code *above* the assertion. There can be multiple lines of assertion comments, // so the positions may have to be decremented by more than one row. let mut i = 0; - for (position, _) in result.iter_mut() { + for assertion in result.iter_mut() { loop { let on_assertion_line = assertion_ranges[i..] .iter() - .any(|(start, _)| start.row == position.row); + .any(|(start, _)| start.row == assertion.position.row); if on_assertion_line { - position.row -= 1; + assertion.position.row -= 1; } else { - while i < assertion_ranges.len() && assertion_ranges[i].0.row < position.row { + while i < assertion_ranges.len() + && assertion_ranges[i].0.row < assertion.position.row + { i += 1; } break; @@ -250,7 +264,7 @@ pub fn parse_highlight_test( } // The assertions can end up out of order due to the line adjustments. - result.sort_unstable_by_key(|a| a.0); + result.sort_unstable_by_key(|a| a.position); Ok(result) } From 6adeb7b40d01fc85f5f14d8fda2339c9e17cc7a4 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 26 Oct 2020 14:27:33 -0400 Subject: [PATCH 228/282] move shared code to query_testing --- cli/src/lib.rs | 1 + cli/src/query.rs | 11 +-- cli/src/query/assert.rs | 39 ---------- cli/src/query_testing.rs | 153 ++++++++++++++++++++++++++++++++++++++ cli/src/test_highlight.rs | 118 +---------------------------- 5 files changed, 160 insertions(+), 162 deletions(-) delete mode 100644 cli/src/query/assert.rs create mode 100644 cli/src/query_testing.rs diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 97c288a17c..e00323b78b 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -6,6 +6,7 @@ pub mod loader; pub mod logger; pub mod parse; pub mod query; +pub mod query_testing; pub mod tags; pub mod test; pub mod test_highlight; diff --git a/cli/src/query.rs b/cli/src/query.rs index de320fba2c..9c524877cf 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -1,13 +1,10 @@ use super::error::{Error, Result}; +use crate::query_testing; use std::fs; use std::io::{self, Write}; use std::path::Path; use tree_sitter::{Language, Node, Parser, Query, QueryCursor}; -mod assert; - -use assert::CaptureInfo; - pub fn query_files_at_paths( language: Language, paths: Vec, @@ -58,7 +55,7 @@ pub fn query_files_at_paths( capture.node.start_position().row, capture.node.utf8_text(&source_code).unwrap_or("") )?; - results.push(CaptureInfo { + results.push(query_testing::CaptureInfo { name: capture_name.to_string(), position: capture.node.start_position(), }); @@ -85,7 +82,7 @@ pub fn query_files_at_paths( capture_name, start, end, )?; } - results.push(CaptureInfo { + results.push(query_testing::CaptureInfo { name: capture_name.to_string(), position: capture.node.start_position(), }); @@ -93,7 +90,7 @@ pub fn query_files_at_paths( } } if should_test { - assert::assert_expected_captures(results, path, &mut parser, language)? + query_testing::assert_expected_captures(results, path, &mut parser, language)? } } diff --git a/cli/src/query/assert.rs b/cli/src/query/assert.rs deleted file mode 100644 index 1099295928..0000000000 --- a/cli/src/query/assert.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::error; -use crate::error::Result; -use crate::test_highlight::parse_highlight_test; -use std::collections::hash_map::HashMap; -use std::fs; -use tree_sitter::{Language, Parser, Point}; - -#[derive(Debug, Eq, PartialEq)] -pub struct CaptureInfo { - pub name: String, - pub position: Point, -} - -pub fn assert_expected_captures( - infos: Vec, - path: String, - parser: &mut Parser, - language: Language, -) -> Result<()> { - let contents = fs::read_to_string(path)?; - let pairs = parse_highlight_test(parser, language, contents.as_bytes())?; - - let per_position_index: HashMap = - pairs.iter().map(|a| (a.position, &a.expected)).collect(); - - for info in &infos { - if !per_position_index.contains_key(&info.position) { - continue; - } - let found = per_position_index.get(&info.position).unwrap(); - if **found != info.name && info.name != "name" { - Err(error::Error::new(format!( - "Assertion failed: at {}, found {}, expected {}", - info.position, info.name, found - )))? - } - } - Ok(()) -} diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs new file mode 100644 index 0000000000..b6e2c169ef --- /dev/null +++ b/cli/src/query_testing.rs @@ -0,0 +1,153 @@ +use crate::error; +use crate::error::Result; +use lazy_static::lazy_static; +use regex::Regex; +use std::collections::hash_map::HashMap; +use std::fs; +use tree_sitter::{Language, Parser, Point}; + +lazy_static! { + static ref HIGHLIGHT_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); +} + +#[derive(Debug, Eq, PartialEq)] +pub struct CaptureInfo { + pub name: String, + pub position: Point, +} + +pub struct Assertion { + pub position: Point, + pub expected: String, +} + +/// Parse the given source code, finding all of the comments that contain +/// highlighting assertions. Return a vector of (position, expected highlight name) +/// pairs. +pub fn parse_highlight_test( + parser: &mut Parser, + language: Language, + source: &[u8], +) -> Result> { + let mut result = Vec::new(); + let mut assertion_ranges = Vec::new(); + + // Parse the code. + parser.set_included_ranges(&[]).unwrap(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + // Walk the tree, finding comment nodes that contain assertions. + let mut ascending = false; + let mut cursor = tree.root_node().walk(); + loop { + if ascending { + let node = cursor.node(); + + // Find every comment node. + if node.kind().contains("comment") { + if let Ok(text) = node.utf8_text(source) { + let mut position = node.start_position(); + if position.row == 0 { + continue; + } + + // Find the arrow character ("^" or '<-") in the comment. A left arrow + // refers to the column where the comment node starts. An up arrow refers + // to its own column. + let mut has_left_caret = false; + let mut has_arrow = false; + let mut arrow_end = 0; + for (i, c) in text.char_indices() { + arrow_end = i + 1; + if c == '-' && has_left_caret { + has_arrow = true; + break; + } + if c == '^' { + has_arrow = true; + position.column += i; + break; + } + has_left_caret = c == '<'; + } + + // If the comment node contains an arrow and a highlight name, record the + // highlight name and the position. + if let (true, Some(mat)) = + (has_arrow, HIGHLIGHT_NAME_REGEX.find(&text[arrow_end..])) + { + assertion_ranges.push((node.start_position(), node.end_position())); + result.push(Assertion { + position: position, + expected: mat.as_str().to_string(), + }); + } + } + } + + // Continue walking the tree. + if cursor.goto_next_sibling() { + ascending = false; + } else if !cursor.goto_parent() { + break; + } + } else if !cursor.goto_first_child() { + ascending = true; + } + } + + // Adjust the row number in each assertion's position to refer to the line of + // code *above* the assertion. There can be multiple lines of assertion comments, + // so the positions may have to be decremented by more than one row. + let mut i = 0; + for assertion in result.iter_mut() { + loop { + let on_assertion_line = assertion_ranges[i..] + .iter() + .any(|(start, _)| start.row == assertion.position.row); + if on_assertion_line { + assertion.position.row -= 1; + } else { + while i < assertion_ranges.len() + && assertion_ranges[i].0.row < assertion.position.row + { + i += 1; + } + break; + } + } + } + + // The assertions can end up out of order due to the line adjustments. + result.sort_unstable_by_key(|a| a.position); + + Ok(result) +} + +pub fn assert_expected_captures( + infos: Vec, + path: String, + parser: &mut Parser, + language: Language, +) -> Result<()> { + let contents = fs::read_to_string(path)?; + let pairs = parse_highlight_test(parser, language, contents.as_bytes())?; + + let per_position_index: HashMap = + pairs.iter().map(|a| (a.position, &a.expected)).collect(); + + for info in &infos { + if !per_position_index.contains_key(&info.position) { + continue; + } + let found = per_position_index.get(&info.position).unwrap(); + if **found != info.name && info.name != "name" { + Err(error::Error::new(format!( + "Assertion failed: at {}, found {}, expected {}", + info.position, found, info.name + )))? + } + } + Ok(()) +} diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 45841a7b59..da67f7539c 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -1,22 +1,12 @@ use super::error::Result; use crate::loader::Loader; +use crate::query_testing::{parse_highlight_test, Assertion}; use ansi_term::Colour; -use lazy_static::lazy_static; -use regex::Regex; use std::fs; use std::path::Path; -use tree_sitter::{Language, Parser, Point}; +use tree_sitter::Point; use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter}; -lazy_static! { - static ref HIGHLIGHT_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); -} - -pub struct Assertion { - pub position: Point, - pub expected: String, -} - pub struct Failure { row: usize, column: usize, @@ -165,110 +155,6 @@ pub fn test_highlight( Ok(assertions.len()) } -/// Parse the given source code, finding all of the comments that contain -/// highlighting assertions. Return a vector of (position, expected highlight name) -/// pairs. -pub fn parse_highlight_test( - parser: &mut Parser, - language: Language, - source: &[u8], -) -> Result> { - let mut result = Vec::new(); - let mut assertion_ranges = Vec::new(); - - // Parse the code. - parser.set_included_ranges(&[]).unwrap(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - - // Walk the tree, finding comment nodes that contain assertions. - let mut ascending = false; - let mut cursor = tree.root_node().walk(); - loop { - if ascending { - let node = cursor.node(); - - // Find every comment node. - if node.kind().contains("comment") { - if let Ok(text) = node.utf8_text(source) { - let mut position = node.start_position(); - if position.row == 0 { - continue; - } - - // Find the arrow character ("^" or '<-") in the comment. A left arrow - // refers to the column where the comment node starts. An up arrow refers - // to its own column. - let mut has_left_caret = false; - let mut has_arrow = false; - let mut arrow_end = 0; - for (i, c) in text.char_indices() { - arrow_end = i + 1; - if c == '-' && has_left_caret { - has_arrow = true; - break; - } - if c == '^' { - has_arrow = true; - position.column += i; - break; - } - has_left_caret = c == '<'; - } - - // If the comment node contains an arrow and a highlight name, record the - // highlight name and the position. - if let (true, Some(mat)) = - (has_arrow, HIGHLIGHT_NAME_REGEX.find(&text[arrow_end..])) - { - assertion_ranges.push((node.start_position(), node.end_position())); - result.push(Assertion { - position: position, - expected: mat.as_str().to_string(), - }); - } - } - } - - // Continue walking the tree. - if cursor.goto_next_sibling() { - ascending = false; - } else if !cursor.goto_parent() { - break; - } - } else if !cursor.goto_first_child() { - ascending = true; - } - } - - // Adjust the row number in each assertion's position to refer to the line of - // code *above* the assertion. There can be multiple lines of assertion comments, - // so the positions may have to be decremented by more than one row. - let mut i = 0; - for assertion in result.iter_mut() { - loop { - let on_assertion_line = assertion_ranges[i..] - .iter() - .any(|(start, _)| start.row == assertion.position.row); - if on_assertion_line { - assertion.position.row -= 1; - } else { - while i < assertion_ranges.len() - && assertion_ranges[i].0.row < assertion.position.row - { - i += 1; - } - break; - } - } - } - - // The assertions can end up out of order due to the line adjustments. - result.sort_unstable_by_key(|a| a.position); - - Ok(result) -} - pub fn get_highlight_positions( loader: &Loader, highlighter: &mut Highlighter, From 1aee60a7c074f6482695a2babf4519bc1064d640 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 26 Oct 2020 14:35:18 -0400 Subject: [PATCH 229/282] propitiate the tests --- cli/src/query_testing.rs | 5 +++-- cli/src/test_highlight.rs | 5 +++-- cli/src/tests/test_highlight_test.rs | 21 ++++++++++++++++----- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index b6e2c169ef..9618d1f6a6 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -16,6 +16,7 @@ pub struct CaptureInfo { pub position: Point, } +#[derive(Debug, PartialEq, Eq)] pub struct Assertion { pub position: Point, pub expected: String, @@ -24,7 +25,7 @@ pub struct Assertion { /// Parse the given source code, finding all of the comments that contain /// highlighting assertions. Return a vector of (position, expected highlight name) /// pairs. -pub fn parse_highlight_test( +pub fn parse_position_comments( parser: &mut Parser, language: Language, source: &[u8], @@ -132,7 +133,7 @@ pub fn assert_expected_captures( language: Language, ) -> Result<()> { let contents = fs::read_to_string(path)?; - let pairs = parse_highlight_test(parser, language, contents.as_bytes())?; + let pairs = parse_position_comments(parser, language, contents.as_bytes())?; let per_position_index: HashMap = pairs.iter().map(|a| (a.position, &a.expected)).collect(); diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index da67f7539c..55a709aa4c 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -1,6 +1,6 @@ use super::error::Result; use crate::loader::Loader; -use crate::query_testing::{parse_highlight_test, Assertion}; +use crate::query_testing::{parse_position_comments, Assertion}; use ansi_term::Colour; use std::fs; use std::path::Path; @@ -91,7 +91,8 @@ pub fn test_highlight( // Highlight the file, and parse out all of the highlighting assertions. let highlight_names = loader.highlight_names(); let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; - let assertions = parse_highlight_test(highlighter.parser(), highlight_config.language, source)?; + let assertions = + parse_position_comments(highlighter.parser(), highlight_config.language, source)?; // Iterate through all of the highlighting assertions, checking each one against the // actual highlights. diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs index 6a857dd94d..669208237c 100644 --- a/cli/src/tests/test_highlight_test.rs +++ b/cli/src/tests/test_highlight_test.rs @@ -1,5 +1,6 @@ use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; -use crate::test_highlight::{get_highlight_positions, parse_highlight_test}; +use crate::query_testing::{parse_position_comments, Assertion}; +use crate::test_highlight::get_highlight_positions; use tree_sitter::{Parser, Point}; use tree_sitter_highlight::{Highlight, Highlighter}; @@ -25,13 +26,23 @@ fn test_highlight_test_with_basic_test() { ] .join("\n"); - let assertions = parse_highlight_test(&mut Parser::new(), language, source.as_bytes()).unwrap(); + let assertions = + parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); assert_eq!( assertions, &[ - (Point::new(0, 5), "function".to_string()), - (Point::new(0, 11), "keyword".to_string()), - (Point::new(3, 9), "variable.parameter".to_string()), + Assertion { + position: Point::new(0, 5), + expected: "function".to_string() + }, + Assertion { + position: Point::new(0, 11), + expected: "keyword".to_string() + }, + Assertion { + position: Point::new(3, 9), + expected: "variable.parameter".to_string() + }, ] ); From 0bd223f032e9bfc92dde4fc7bb83164b16bb039c Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 27 Oct 2020 13:11:57 -0400 Subject: [PATCH 230/282] Better naming for this regex. --- cli/src/query_testing.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 9618d1f6a6..8c6af706b2 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -7,7 +7,7 @@ use std::fs; use tree_sitter::{Language, Parser, Point}; lazy_static! { - static ref HIGHLIGHT_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); + static ref PROPERTY_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); } #[derive(Debug, Eq, PartialEq)] @@ -76,7 +76,7 @@ pub fn parse_position_comments( // If the comment node contains an arrow and a highlight name, record the // highlight name and the position. if let (true, Some(mat)) = - (has_arrow, HIGHLIGHT_NAME_REGEX.find(&text[arrow_end..])) + (has_arrow, PROPERTY_NAME_REGEX.find(&text[arrow_end..])) { assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { From a2d760e42694b9077e61bc0d5f48dfd5a4325baf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 27 Oct 2020 15:46:09 -0700 Subject: [PATCH 231/282] Ensure nodes are aliased consistently within syntax error nodes Co-Authored-By: Rick Winfrey --- .../extract_default_aliases.rs | 293 ++++++++++++++++++ .../prepare_grammar/extract_simple_aliases.rs | 223 ------------- cli/src/generate/prepare_grammar/mod.rs | 8 +- cli/src/generate/render.rs | 18 +- cli/src/tests/query_test.rs | 24 ++ 5 files changed, 330 insertions(+), 236 deletions(-) create mode 100644 cli/src/generate/prepare_grammar/extract_default_aliases.rs delete mode 100644 cli/src/generate/prepare_grammar/extract_simple_aliases.rs diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs new file mode 100644 index 0000000000..3e08e3adbe --- /dev/null +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -0,0 +1,293 @@ +use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; +use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; + +#[derive(Clone, Default)] +struct SymbolStatus { + aliases: Vec<(Alias, usize)>, + appears_unaliased: bool, +} + +// Update the grammar by finding symbols that always are aliased, and for each such symbol, +// promoting one of its aliases to a "default alias", which is applied globally instead +// of in a context-specific way. +// +// This has two benefits: +// * It reduces the overhead of storing production-specific alias info in the parse table. +// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation +// ensures that the children of an `ERROR` node have symbols that are consistent with the +// way that they would appear in a valid syntax tree. +pub(super) fn extract_default_aliases( + syntax_grammar: &mut SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> AliasMap { + let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; + let mut non_terminal_status_list = + vec![SymbolStatus::default(); syntax_grammar.variables.len()]; + let mut external_status_list = + vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; + + // For each grammar symbol, find all of the aliases under which the symbol appears, + // and determine whether or not the symbol ever appears *unaliased*. + for variable in syntax_grammar.variables.iter() { + for production in variable.productions.iter() { + for step in production.steps.iter() { + let mut status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // Default aliases don't work for inlined variables. + if syntax_grammar.variables_to_inline.contains(&step.symbol) { + continue; + } + + if let Some(alias) = &step.alias { + if let Some(count_for_alias) = status + .aliases + .iter_mut() + .find_map(|(a, count)| if a == alias { Some(count) } else { None }) + { + *count_for_alias += 1; + } else { + status.aliases.push((alias.clone(), 1)); + } + } else { + status.appears_unaliased = true; + } + } + } + } + + let symbols_with_statuses = (terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::terminal(i), status))) + .chain( + non_terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::non_terminal(i), status)), + ) + .chain( + external_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::external(i), status)), + ); + + // For each symbol that always appears aliased, find the alias the occurs most often, + // and designate that alias as the symbol's "default alias". Store all of these + // default aliases in a map that will be returned. + let mut result = AliasMap::new(); + for (symbol, status) in symbols_with_statuses { + if status.appears_unaliased { + status.aliases.clear(); + } else { + if let Some(default_entry) = status + .aliases + .iter() + .enumerate() + .max_by_key(|(i, (_, count))| (count, -(*i as i64))) + .map(|(_, entry)| entry.clone()) + { + status.aliases.clear(); + status.aliases.push(default_entry.clone()); + result.insert(symbol, default_entry.0); + } + } + } + + // Wherever a symbol is aliased as its default alias, remove the usage of the alias, + // because it will now be redundant. + let mut alias_positions_to_clear = Vec::new(); + for variable in syntax_grammar.variables.iter_mut() { + alias_positions_to_clear.clear(); + + for (i, production) in variable.productions.iter().enumerate() { + for (j, step) in production.steps.iter().enumerate() { + let status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // If this step is aliased as the symbol's default alias, then remove that alias. + if step.alias.is_some() + && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0) + { + let mut other_productions_must_use_this_alias_at_this_index = false; + for (other_i, other_production) in variable.productions.iter().enumerate() { + if other_i != i + && other_production.steps.len() > j + && other_production.steps[j].alias == step.alias + && result.get(&other_production.steps[j].symbol) != step.alias.as_ref() + { + other_productions_must_use_this_alias_at_this_index = true; + break; + } + } + + if !other_productions_must_use_this_alias_at_this_index { + alias_positions_to_clear.push((i, j)); + } + } + } + } + + for (production_index, step_index) in &alias_positions_to_clear { + variable.productions[*production_index].steps[*step_index].alias = None; + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{ + LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + }; + use crate::generate::nfa::Nfa; + + #[test] + fn test_extract_simple_aliases() { + let mut syntax_grammar = SyntaxGrammar { + variables: vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + }], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + // Token 0 is always aliased as "a1". + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + // Token 1 is aliased within rule `v1` above, but not here. + ProductionStep::new(Symbol::terminal(1)), + // Token 2 is aliased differently here than in `v1`. The alias from + // `v1` should be promoted to the default alias, because `v1` appears + // first in the grammar. + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + // Token 3 is also aliased differently here than in `v1`. In this case, + // this alias should be promoted to the default alias, because it is + // used a greater number of times (twice). + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ], + }], + }, + ], + extra_symbols: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + + let lexical_grammar = LexicalGrammar { + nfa: Nfa::new(), + variables: vec![ + LexicalVariable { + name: "t0".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t1".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t2".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t3".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + ], + }; + + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); + assert_eq!(default_aliases.len(), 3); + + assert_eq!( + default_aliases.get(&Symbol::terminal(0)), + Some(&Alias { + value: "a1".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(2)), + Some(&Alias { + value: "a3".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(3)), + Some(&Alias { + value: "a6".to_string(), + is_named: true, + }) + ); + assert_eq!(default_aliases.get(&Symbol::terminal(1)), None); + + assert_eq!( + syntax_grammar.variables, + vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + },], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)), + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + ProductionStep::new(Symbol::terminal(3)), + ProductionStep::new(Symbol::terminal(3)), + ], + },], + }, + ] + ); + } +} diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs deleted file mode 100644 index 6da009d56d..0000000000 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ /dev/null @@ -1,223 +0,0 @@ -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; - -#[derive(Clone, Default)] -struct SymbolStatus { - alias: Option, - conflicting: bool, -} - -pub(super) fn extract_simple_aliases( - syntax_grammar: &mut SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> AliasMap { - // Determine which symbols in the grammars are *always* aliased to a single name. - let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; - let mut non_terminal_status_list = - vec![SymbolStatus::default(); syntax_grammar.variables.len()]; - let mut external_status_list = - vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; - for variable in syntax_grammar.variables.iter() { - for production in variable.productions.iter() { - for step in production.steps.iter() { - let mut status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &mut external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &mut non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &mut terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if step.alias.is_none() { - status.alias = None; - status.conflicting = true; - } - - if !status.conflicting { - if status.alias.is_none() { - status.alias = step.alias.clone(); - } else if status.alias != step.alias { - status.alias = None; - status.conflicting = true; - } - } - } - } - } - - // Remove the aliases for those symbols. - for variable in syntax_grammar.variables.iter_mut() { - for production in variable.productions.iter_mut() { - for step in production.steps.iter_mut() { - let status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if status.alias.is_some() { - step.alias = None; - } - } - } - } - - // Populate a map of the symbols to their aliases. - let mut result = AliasMap::new(); - for (i, status) in terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::terminal(i), alias); - } - } - for (i, status) in non_terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::non_terminal(i), alias); - } - } - for (i, status) in external_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::external(i), alias); - } - } - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, - }; - use crate::generate::nfa::Nfa; - - #[test] - fn test_extract_simple_aliases() { - let mut syntax_grammar = SyntaxGrammar { - variables: vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - }], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // Token 0 is always aliased as "a1". - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - // Token 1 is aliased above, but not here. - ProductionStep::new(Symbol::terminal(1)), - // Token 2 is aliased differently than above. - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - }], - }, - ], - extra_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - }; - - let lexical_grammar = LexicalGrammar { - nfa: Nfa::new(), - variables: vec![ - LexicalVariable { - name: "t1".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t2".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t3".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - ], - }; - - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); - assert_eq!(simple_aliases.len(), 1); - assert_eq!( - simple_aliases[&Symbol::terminal(0)], - Alias { - value: "a1".to_string(), - is_named: true, - } - ); - - assert_eq!( - syntax_grammar.variables, - vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // 'Simple' alias removed - ProductionStep::new(Symbol::terminal(0)), - // Other aliases unchanged - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - },], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(1)), - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - },], - }, - ] - ); - } -} diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 029483d37e..8b094c562d 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -1,6 +1,6 @@ mod expand_repeats; mod expand_tokens; -mod extract_simple_aliases; +mod extract_default_aliases; mod extract_tokens; mod flatten_grammar; mod intern_symbols; @@ -8,7 +8,7 @@ mod process_inlines; use self::expand_repeats::expand_repeats; pub(crate) use self::expand_tokens::expand_tokens; -use self::extract_simple_aliases::extract_simple_aliases; +use self::extract_default_aliases::extract_default_aliases; use self::extract_tokens::extract_tokens; use self::flatten_grammar::flatten_grammar; use self::intern_symbols::intern_symbols; @@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar( let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; let lexical_grammar = expand_tokens(lexical_grammar)?; - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); let inlines = process_inlines(&syntax_grammar); - Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases)) + Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index f7f788d07e..e1e75ee133 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -65,7 +65,7 @@ struct Generator { keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, symbol_order: HashMap, symbol_ids: HashMap, alias_ids: HashMap, @@ -198,10 +198,10 @@ impl Generator { // public-facing symbol. If one of the symbols is not aliased, choose that one // to be the public-facing symbol. Otherwise, pick the symbol with the lowest // numeric value. - if let Some(alias) = self.simple_aliases.get(symbol) { + if let Some(alias) = self.default_aliases.get(symbol) { let kind = alias.kind(); for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.simple_aliases.get(other_symbol) { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { if other_symbol < mapping && other_alias == alias { mapping = other_symbol; } @@ -361,7 +361,7 @@ impl Generator { indent!(self); for symbol in self.parse_table.symbols.iter() { let name = self.sanitize_string( - self.simple_aliases + self.default_aliases .get(symbol) .map(|alias| alias.value.as_str()) .unwrap_or(self.metadata_for_symbol(*symbol).0), @@ -444,7 +444,7 @@ impl Generator { for symbol in &self.parse_table.symbols { add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]); indent!(self); - if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) { + if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { add_line!(self, ".visible = true,"); add_line!(self, ".named = {},", is_named); } else { @@ -525,7 +525,7 @@ impl Generator { for step in &production.steps { if let Some(alias) = &step.alias { if step.symbol.is_non_terminal() - && !self.simple_aliases.contains_key(&step.symbol) + && Some(alias) != self.default_aliases.get(&step.symbol) { if self.symbol_ids.contains_key(&step.symbol) { let alias_ids = @@ -1545,7 +1545,7 @@ impl Generator { /// for keyword capture, if any. /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar -/// * `simple_aliases` - A map describing the global rename rules that should apply. +/// * `default_aliases` - A map describing the global rename rules that should apply. /// the keys are symbols that are *always* aliased in the same way, and the values /// are the aliases that are applied to those symbols. /// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse @@ -1558,7 +1558,7 @@ pub(crate) fn render_c_code( keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, next_abi: bool, ) -> String { Generator { @@ -1572,7 +1572,7 @@ pub(crate) fn render_c_code( keyword_capture_token, syntax_grammar, lexical_grammar, - simple_aliases, + default_aliases, symbol_ids: HashMap::new(), symbol_order: HashMap::new(), alias_ids: HashMap::new(), diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 1f7ddaffd6..067bb6f901 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -367,6 +367,30 @@ fn test_query_errors_on_impossible_patterns() { }); } +#[test] +fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { + allocations::record(|| { + let ruby = get_language("ruby"); + + Query::new(ruby, "(destructured_parameter (identifier))").unwrap(); + + assert_eq!( + Query::new(ruby, "(destructured_parameter (string))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 24, + column: 24, + message: [ + "(destructured_parameter (string))", // + " ^", + ] + .join("\n") + }) + ); + }); +} + #[test] fn test_query_matches_with_simple_pattern() { allocations::record(|| { From 071f4e40f19b23e83e7b16ca9272d5a3ca1806df Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Oct 2020 12:34:11 -0700 Subject: [PATCH 232/282] Fix generate error when there are aliases in unused rules --- cli/src/generate/render.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index e1e75ee133..04f9e47b79 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -519,7 +519,7 @@ impl Generator { } fn add_non_terminal_alias_map(&mut self) { - let mut aliases_by_symbol = HashMap::new(); + let mut alias_ids_by_symbol = HashMap::new(); for variable in &self.syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { @@ -528,10 +528,13 @@ impl Generator { && Some(alias) != self.default_aliases.get(&step.symbol) { if self.symbol_ids.contains_key(&step.symbol) { - let alias_ids = - aliases_by_symbol.entry(step.symbol).or_insert(Vec::new()); - if let Err(i) = alias_ids.binary_search(&alias) { - alias_ids.insert(i, alias); + if let Some(alias_id) = self.alias_ids.get(&alias) { + let alias_ids = alias_ids_by_symbol + .entry(step.symbol) + .or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias_id) { + alias_ids.insert(i, alias_id); + } } } } @@ -540,19 +543,19 @@ impl Generator { } } - let mut aliases_by_symbol = aliases_by_symbol.iter().collect::>(); - aliases_by_symbol.sort_unstable_by_key(|e| e.0); + let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::>(); + alias_ids_by_symbol.sort_unstable_by_key(|e| e.0); add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{"); indent!(self); - for (symbol, aliases) in aliases_by_symbol { + for (symbol, alias_ids) in alias_ids_by_symbol { let symbol_id = &self.symbol_ids[symbol]; let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; - add_line!(self, "{}, {},", symbol_id, 1 + aliases.len()); + add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len()); indent!(self); add_line!(self, "{},", public_symbol_id); - for alias in aliases { - add_line!(self, "{},", &self.alias_ids[&alias]); + for alias_id in alias_ids { + add_line!(self, "{},", alias_id); } dedent!(self); } From c2c63baf5bdba4768bed97427f5ac94f54df70df Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Oct 2020 13:55:06 -0700 Subject: [PATCH 233/282] query: Fix escape sequence parsing in anonymous node patterns Fixes #776 Fixes #760 --- cli/src/tests/query_test.rs | 5 +- lib/src/array.h | 18 +++- lib/src/query.c | 178 ++++++++++++++---------------------- 3 files changed, 86 insertions(+), 115 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 067bb6f901..6bf6cbb001 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1475,6 +1475,7 @@ fn test_query_matches_with_anonymous_tokens() { r#" ";" @punctuation "&&" @operator + "\"" @quote "#, ) .unwrap(); @@ -1482,9 +1483,11 @@ fn test_query_matches_with_anonymous_tokens() { assert_query_matches( language, &query, - "foo(a && b);", + r#"foo(a && "b");"#, &[ (1, vec![("operator", "&&")]), + (2, vec![("quote", "\"")]), + (2, vec![("quote", "\"")]), (0, vec![("punctuation", ";")]), ], ); diff --git a/lib/src/array.h b/lib/src/array.h index 13117194d9..5ff5580a22 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -52,14 +52,24 @@ extern "C" { (self)->size += (count)) #define array_push_all(self, other) \ - array_splice((self), (self)->size, 0, (other)->size, (other)->contents) + array_extend((self), (other)->size, (other)->contents) + +// Append `count` elements to the end of the array, reading their values from the +// `contents` pointer. +#define array_extend(self, count, contents) \ + array__splice( \ + (VoidArray *)(self), array__elem_size(self), (self)->size, \ + 0, count, contents \ + ) // Remove `old_count` elements from the array starting at the given `index`. At // the same index, insert `new_count` new elements, reading their values from the // `new_contents` pointer. -#define array_splice(self, index, old_count, new_count, new_contents) \ - array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \ - new_count, new_contents) +#define array_splice(self, index, old_count, new_count, new_contents) \ + array__splice( \ + (VoidArray *)(self), array__elem_size(self), index, \ + old_count, new_count, new_contents \ + ) // Insert one `element` into the array at the given `index`. #define array_insert(self, index, element) \ diff --git a/lib/src/query.c b/lib/src/query.c index ae476c2ab5..bf0598ce55 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -214,6 +214,7 @@ struct TSQuery { Array(TSQueryPredicateStep) predicate_steps; Array(QueryPattern) patterns; Array(StepOffset) step_offsets; + Array(char) string_buffer; const TSLanguage *language; uint16_t wildcard_root_pattern_count; TSSymbol *symbol_map; @@ -439,67 +440,6 @@ static uint16_t symbol_table_insert_name( return self->slices.size - 1; } -static uint16_t symbol_table_insert_name_with_escapes( - SymbolTable *self, - const char *escaped_name, - uint32_t escaped_length -) { - Slice slice = { - .offset = self->characters.size, - .length = 0, - }; - array_grow_by(&self->characters, escaped_length + 1); - - // Copy the contents of the literal into the characters buffer, processing escape - // sequences like \n and \". This needs to be done before checking if the literal - // is already present, in order to do the string comparison. - bool is_escaped = false; - for (unsigned i = 0; i < escaped_length; i++) { - const char *src = &escaped_name[i]; - char *dest = &self->characters.contents[slice.offset + slice.length]; - if (is_escaped) { - switch (*src) { - case 'n': - *dest = '\n'; - break; - case 'r': - *dest = '\r'; - break; - case 't': - *dest = '\t'; - break; - case '0': - *dest = '\0'; - break; - default: - *dest = *src; - break; - } - is_escaped = false; - slice.length++; - } else { - if (*src == '\\') { - is_escaped = true; - } else { - *dest = *src; - slice.length++; - } - } - } - - // If the string is already present, remove the redundant content from the characters - // buffer and return the existing id. - int id = symbol_table_id_for_name(self, &self->characters.contents[slice.offset], slice.length); - if (id >= 0) { - self->characters.size -= (escaped_length + 1); - return id; - } - - self->characters.contents[slice.offset + slice.length] = 0; - array_push(&self->slices, slice); - return self->slices.size - 1; -} - /************ * QueryStep ************/ @@ -1393,6 +1333,59 @@ static void ts_query__finalize_steps(TSQuery *self) { } } +static TSQueryError ts_query__parse_string_literal( + TSQuery *self, + Stream *stream +) { + const char *string_start = stream->input; + if (stream->next != '"') return TSQueryErrorSyntax; + stream_advance(stream); + const char *prev_position = stream->input; + + bool is_escaped = false; + array_clear(&self->string_buffer); + for (;;) { + if (is_escaped) { + is_escaped = false; + switch (stream->next) { + case 'n': + array_push(&self->string_buffer, '\n'); + break; + case 'r': + array_push(&self->string_buffer, '\r'); + break; + case 't': + array_push(&self->string_buffer, '\t'); + break; + case '0': + array_push(&self->string_buffer, '\0'); + break; + default: + array_extend(&self->string_buffer, stream->next_size, stream->input); + break; + } + prev_position = stream->input + stream->next_size; + } else { + if (stream->next == '\\') { + array_extend(&self->string_buffer, (stream->input - prev_position), prev_position); + prev_position = stream->input + 1; + is_escaped = true; + } else if (stream->next == '"') { + array_extend(&self->string_buffer, (stream->input - prev_position), prev_position); + stream_advance(stream); + return TSQueryErrorNone; + } else if (stream->next == '\n') { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } + if (!stream_advance(stream)) { + stream_reset(stream, string_start); + return TSQueryErrorSyntax; + } + } +} + // Parse a single predicate associated with a pattern, adding it to the // query's internal `predicate_steps` array. Predicates are arbitrary // S-expressions associated with a pattern which are meant to be handled at @@ -1458,44 +1451,17 @@ static TSQueryError ts_query__parse_predicate( // Parse a string literal else if (stream->next == '"') { - stream_advance(stream); - - // Parse the string content - bool is_escaped = false; - const char *string_content = stream->input; - for (;;) { - if (is_escaped) { - is_escaped = false; - } else { - if (stream->next == '\\') { - is_escaped = true; - } else if (stream->next == '"') { - break; - } else if (stream->next == '\n') { - stream_reset(stream, string_content - 1); - return TSQueryErrorSyntax; - } - } - if (!stream_advance(stream)) { - stream_reset(stream, string_content - 1); - return TSQueryErrorSyntax; - } - } - uint32_t length = stream->input - string_content; - - // Add a step for the node - uint16_t id = symbol_table_insert_name_with_escapes( + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; + uint16_t id = symbol_table_insert_name( &self->predicate_values, - string_content, - length + self->string_buffer.contents, + self->string_buffer.size ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, .value_id = id, })); - - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); } // Parse a bare symbol @@ -1761,33 +1727,22 @@ static TSQueryError ts_query__parse_pattern( // Parse a double-quoted anonymous leaf node expression else if (stream->next == '"') { - stream_advance(stream); - - // Parse the string content - const char *string_content = stream->input; - while (stream->next != '"') { - if (!stream_advance(stream)) { - stream_reset(stream, string_content - 1); - return TSQueryErrorSyntax; - } - } - uint32_t length = stream->input - string_content; + const char *string_start = stream->input; + TSQueryError e = ts_query__parse_string_literal(self, stream); + if (e) return e; // Add a step for the node TSSymbol symbol = ts_language_symbol_for_name( self->language, - string_content, - length, + self->string_buffer.contents, + self->string_buffer.size, false ); if (!symbol) { - stream_reset(stream, string_content); + stream_reset(stream, string_start + 1); return TSQueryErrorNodeType; } array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); - - if (stream->next != '"') return TSQueryErrorSyntax; - stream_advance(stream); } // Parse a field-prefixed pattern @@ -1977,6 +1932,7 @@ TSQuery *ts_query_new( .predicate_steps = array_new(), .patterns = array_new(), .step_offsets = array_new(), + .string_buffer = array_new(), .symbol_map = symbol_map, .wildcard_root_pattern_count = 0, .language = language, @@ -2056,6 +2012,7 @@ TSQuery *ts_query_new( } ts_query__finalize_steps(self); + array_delete(&self->string_buffer); return self; } @@ -2066,6 +2023,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->predicate_steps); array_delete(&self->patterns); array_delete(&self->step_offsets); + array_delete(&self->string_buffer); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); ts_free(self->symbol_map); From 199273419562c66d7c1225213a55a186394422c2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Oct 2020 14:12:56 -0700 Subject: [PATCH 234/282] 0.17.2 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea918eb687..d052511d76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -832,7 +832,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.17.1" +version = "0.17.2" dependencies = [ "ansi_term", "atty", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 21a8fa0fc4..a81ce16c89 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.17.1" +version = "0.17.2" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index 42f75c9888..f327698cbf 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.17.1", + "version": "0.17.2", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 3859e52198468c6328cb7508f747f51b4aef13be Mon Sep 17 00:00:00 2001 From: ikrima Date: Fri, 16 Oct 2020 12:42:26 -0700 Subject: [PATCH 235/282] add custom allocation override --- lib/src/alloc.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index 0e0927a928..52e5ad3dae 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -42,7 +42,20 @@ static inline bool ts_toggle_allocation_recording(bool value) { return false; } -static inline void *ts_malloc(size_t size) { +#ifndef ts_malloc +#define ts_malloc(_sz) ts_malloc_dflt(_sz) +#endif +#ifndef ts_calloc +#define ts_calloc(_cnt,_sz) ts_calloc_dflt(_cnt,_sz) +#endif +#ifndef ts_realloc +#define ts_realloc(_ptr,_sz) ts_realloc_dflt(_ptr,_sz) +#endif +#ifndef ts_free +#define ts_free(_ptr) ts_free_dflt(_ptr) +#endif + +static inline void *ts_malloc_dflt(size_t size) { void *result = malloc(size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); @@ -51,7 +64,7 @@ static inline void *ts_malloc(size_t size) { return result; } -static inline void *ts_calloc(size_t count, size_t size) { +static inline void *ts_calloc_dflt(size_t count, size_t size) { void *result = calloc(count, size); if (count > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); @@ -60,7 +73,7 @@ static inline void *ts_calloc(size_t count, size_t size) { return result; } -static inline void *ts_realloc(void *buffer, size_t size) { +static inline void *ts_realloc_dflt(void *buffer, size_t size) { void *result = realloc(buffer, size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); @@ -69,7 +82,7 @@ static inline void *ts_realloc(void *buffer, size_t size) { return result; } -static inline void ts_free(void *buffer) { +static inline void ts_free_dflt(void *buffer) { free(buffer); } From 336517fdc92fbc8ffcba199e3a4cd55e1c516181 Mon Sep 17 00:00:00 2001 From: ikrima Date: Fri, 16 Oct 2020 15:18:54 -0700 Subject: [PATCH 236/282] address CR comments - replace _dflt with _default - allow override in TREE_SITTER_TEST path --- lib/src/alloc.h | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index 52e5ad3dae..c6a3331b86 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -9,6 +9,21 @@ extern "C" { #include #include +// Allow clients to override allocation functions + +#ifndef ts_malloc +#define ts_malloc(size) ts_malloc_default(size) +#endif +#ifndef ts_calloc +#define ts_calloc(count,size) ts_calloc_default(count,size) +#endif +#ifndef ts_realloc +#define ts_realloc(buffer,size) ts_realloc_default(buffer,size) +#endif +#ifndef ts_free +#define ts_free(buffer) ts_free_default(buffer) +#endif + #if defined(TREE_SITTER_TEST) void *ts_record_malloc(size_t); @@ -17,19 +32,19 @@ void *ts_record_realloc(void *, size_t); void ts_record_free(void *); bool ts_toggle_allocation_recording(bool); -static inline void *ts_malloc(size_t size) { +static inline void *ts_malloc_default(size_t size) { return ts_record_malloc(size); } -static inline void *ts_calloc(size_t count, size_t size) { +static inline void *ts_calloc_default(size_t count, size_t size) { return ts_record_calloc(count, size); } -static inline void *ts_realloc(void *buffer, size_t size) { +static inline void *ts_realloc_default(void *buffer, size_t size) { return ts_record_realloc(buffer, size); } -static inline void ts_free(void *buffer) { +static inline void ts_free_default(void *buffer) { ts_record_free(buffer); } @@ -42,20 +57,8 @@ static inline bool ts_toggle_allocation_recording(bool value) { return false; } -#ifndef ts_malloc -#define ts_malloc(_sz) ts_malloc_dflt(_sz) -#endif -#ifndef ts_calloc -#define ts_calloc(_cnt,_sz) ts_calloc_dflt(_cnt,_sz) -#endif -#ifndef ts_realloc -#define ts_realloc(_ptr,_sz) ts_realloc_dflt(_ptr,_sz) -#endif -#ifndef ts_free -#define ts_free(_ptr) ts_free_dflt(_ptr) -#endif -static inline void *ts_malloc_dflt(size_t size) { +static inline void *ts_malloc_default(size_t size) { void *result = malloc(size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); @@ -64,7 +67,7 @@ static inline void *ts_malloc_dflt(size_t size) { return result; } -static inline void *ts_calloc_dflt(size_t count, size_t size) { +static inline void *ts_calloc_default(size_t count, size_t size) { void *result = calloc(count, size); if (count > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); @@ -73,7 +76,7 @@ static inline void *ts_calloc_dflt(size_t count, size_t size) { return result; } -static inline void *ts_realloc_dflt(void *buffer, size_t size) { +static inline void *ts_realloc_default(void *buffer, size_t size) { void *result = realloc(buffer, size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); @@ -82,7 +85,7 @@ static inline void *ts_realloc_dflt(void *buffer, size_t size) { return result; } -static inline void ts_free_dflt(void *buffer) { +static inline void ts_free_default(void *buffer) { free(buffer); } From 23530ca599758a4d1d4c1393238b74830256e2db Mon Sep 17 00:00:00 2001 From: ikrima Date: Thu, 29 Oct 2020 09:23:58 -0700 Subject: [PATCH 237/282] CR fixes: don't allow override of allocfn during testing --- lib/src/alloc.h | 48 ++++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index c6a3331b86..cbedb71b8b 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -9,21 +9,6 @@ extern "C" { #include #include -// Allow clients to override allocation functions - -#ifndef ts_malloc -#define ts_malloc(size) ts_malloc_default(size) -#endif -#ifndef ts_calloc -#define ts_calloc(count,size) ts_calloc_default(count,size) -#endif -#ifndef ts_realloc -#define ts_realloc(buffer,size) ts_realloc_default(buffer,size) -#endif -#ifndef ts_free -#define ts_free(buffer) ts_free_default(buffer) -#endif - #if defined(TREE_SITTER_TEST) void *ts_record_malloc(size_t); @@ -32,24 +17,27 @@ void *ts_record_realloc(void *, size_t); void ts_record_free(void *); bool ts_toggle_allocation_recording(bool); -static inline void *ts_malloc_default(size_t size) { - return ts_record_malloc(size); -} - -static inline void *ts_calloc_default(size_t count, size_t size) { - return ts_record_calloc(count, size); -} - -static inline void *ts_realloc_default(void *buffer, size_t size) { - return ts_record_realloc(buffer, size); -} - -static inline void ts_free_default(void *buffer) { - ts_record_free(buffer); -} +#define ts_malloc ts_record_malloc +#define ts_calloc ts_record_calloc +#define ts_realloc ts_record_realloc +#define ts_free ts_record_free #else +// Allow clients to override allocation functions +#ifndef ts_malloc +#define ts_malloc ts_malloc_default +#endif +#ifndef ts_calloc +#define ts_calloc ts_calloc_default +#endif +#ifndef ts_realloc +#define ts_realloc ts_realloc_default +#endif +#ifndef ts_free +#define ts_free ts_free_default +#endif + #include static inline bool ts_toggle_allocation_recording(bool value) { From a99676282f1f18c8187bb02171ad1f261ea3c9ca Mon Sep 17 00:00:00 2001 From: ikrima Date: Thu, 29 Oct 2020 09:36:44 -0700 Subject: [PATCH 238/282] noop: touch file to retrigger github checks --- lib/src/alloc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/src/alloc.h b/lib/src/alloc.h index cbedb71b8b..6e22a0abcb 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -25,6 +25,7 @@ bool ts_toggle_allocation_recording(bool); #else // Allow clients to override allocation functions + #ifndef ts_malloc #define ts_malloc ts_malloc_default #endif From f07dda692e3a6f4f2229c3a064fa19b8be7bc225 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Thu, 29 Oct 2020 18:05:24 +0100 Subject: [PATCH 239/282] Ensure "extras" symbols are included in the node-types.json file The symbols marked as "extras" are the start symbols of secondary languages. These should be included in the aliases map just as done for start symbol of the main language to ensure their node type and field information is included in the node-types.json file. --- cli/src/generate/node_types.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 7962c7f33b..a575d19743 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -424,6 +424,14 @@ pub(crate) fn generate_node_types_json( aliases }); } + for extra_symbol in &syntax_grammar.extra_symbols { + if !simple_aliases.contains_key(extra_symbol) { + aliases_by_symbol + .entry(*extra_symbol) + .or_insert(HashSet::new()) + .insert(None); + } + } for variable in &syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { From bcd48e3b9402326c4a34caed68cf9193edb91f37 Mon Sep 17 00:00:00 2001 From: "Alexandre A. Muller" Date: Thu, 29 Oct 2020 19:08:55 +0000 Subject: [PATCH 240/282] add link to VHDL parser --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index 03c1a60cc5..d9410cc276 100644 --- a/docs/index.md +++ b/docs/index.md @@ -50,6 +50,7 @@ Parsers for these languages are fairly complete: * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) +* [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) * [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) From d62e7f7d75f0417c0e1c35a9548031d16b31328e Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Thu, 29 Oct 2020 19:02:30 +0100 Subject: [PATCH 241/282] Add test case with extra_symbols --- cli/src/generate/node_types.rs | 115 +++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index a575d19743..9fb1fe8d0b 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -730,9 +730,114 @@ mod tests { kind: VariableType::Named, rule: Rule::string("x"), }, + // This rule is not reachable from the start symbol + // so it won't be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, + ], + }); + + assert_eq!(node_types.len(), 3); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "v1".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "v2".to_string(), + named: true, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: ";".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ) + } + ); + assert_eq!( + node_types[1], + NodeInfoJSON { + kind: ";".to_string(), + named: false, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[2], + NodeInfoJSON { + kind: "v2".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + } + + #[test] + fn test_node_types_simple_extras() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: vec![Rule::named("v3")], + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::named("v2")), + Rule::field("f2".to_string(), Rule::string(";")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + // This rule is not reachable from the start symbol, but + // it is reachable from the 'extra_symbols' so it + // should be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, ], }); + assert_eq!(node_types.len(), 4); + assert_eq!( node_types[0], NodeInfoJSON { @@ -790,6 +895,16 @@ mod tests { fields: None } ); + assert_eq!( + node_types[3], + NodeInfoJSON { + kind: "v3".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); } #[test] From 505695040d9443e17d53bab4e39b498a8405e468 Mon Sep 17 00:00:00 2001 From: Stafford Brunk Date: Fri, 30 Oct 2020 06:57:04 -0600 Subject: [PATCH 242/282] Update TS definitions to support the Query API --- lib/binding_web/tree-sitter-web.d.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 7ddae95216..6958a9bfa1 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -37,7 +37,7 @@ declare module 'web-tree-sitter' { export type Logger = ( message: string, - params: {[param: string]: string}, + params: { [param: string]: string }, type: "parse" | "lex" ) => void; @@ -131,8 +131,15 @@ declare module 'web-tree-sitter' { readonly version: number; readonly fieldCount: number; - fieldNameForId(fieldId: number): string | null - fieldIdForName(fieldName: string): number | null + fieldNameForId(fieldId: number): string | null; + fieldIdForName(fieldName: string): number | null; + query(source: string): Query; + } + + class Query { + delete(): void; + matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point); + captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point); } } From a7a6139e70ea182ebf09aef8413d9916e20d9afc Mon Sep 17 00:00:00 2001 From: Stafford Brunk Date: Fri, 30 Oct 2020 10:20:12 -0600 Subject: [PATCH 243/282] Add additional Query API typedefs based on Elm Language Server's overrides --- lib/binding_web/tree-sitter-web.d.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 6958a9bfa1..ae76e80300 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -48,6 +48,7 @@ declare module 'web-tree-sitter' { ) => string | null; export interface SyntaxNode { + id: number; tree: Tree; type: string; isNamed: boolean; @@ -136,10 +137,23 @@ declare module 'web-tree-sitter' { query(source: string): Query; } + interface QueryResult { + pattern: number; + captures: { name: string; node: SyntaxNode }[]; + } + + interface PredicateResult { + operator: string; + operands: { name: string; type: string }[]; + } + class Query { + captureNames: string[]; + delete(): void; - matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point); - captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point); + matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; + captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; + predicatesForPattern(patternIndex: number): PredicateResult[]; } } From 3497f34dd78b960ef30f2aa18b3d03fc517a1a84 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 13:43:28 -0800 Subject: [PATCH 244/282] Fix parser-generation bugs introduced in #782 --- cli/src/generate/node_types.rs | 33 ++++++------ cli/src/generate/render.rs | 93 ++++++++++++++++------------------ 2 files changed, 62 insertions(+), 64 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 9fb1fe8d0b..bc5a836fab 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -146,7 +146,7 @@ impl ChildQuantity { pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, ) -> Result> { let child_type_is_visible = |t: &ChildType| { variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous @@ -185,7 +185,7 @@ pub(crate) fn get_variable_info( let child_symbol = step.symbol; let child_type = if let Some(alias) = &step.alias { ChildType::Aliased(alias.clone()) - } else if let Some(alias) = simple_aliases.get(&step.symbol) { + } else if let Some(alias) = default_aliases.get(&step.symbol) { ChildType::Aliased(alias.clone()) } else { ChildType::Normal(child_symbol) @@ -358,7 +358,7 @@ pub(crate) fn get_variable_info( pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, variable_info: &Vec, ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(&symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -417,7 +417,7 @@ pub(crate) fn generate_node_types_json( }; let mut aliases_by_symbol = HashMap::new(); - for (symbol, alias) in simple_aliases { + for (symbol, alias) in default_aliases { aliases_by_symbol.insert(*symbol, { let mut aliases = HashSet::new(); aliases.insert(Some(alias.clone())); @@ -425,7 +425,7 @@ pub(crate) fn generate_node_types_json( }); } for extra_symbol in &syntax_grammar.extra_symbols { - if !simple_aliases.contains_key(extra_symbol) { + if !default_aliases.contains_key(extra_symbol) { aliases_by_symbol .entry(*extra_symbol) .or_insert(HashSet::new()) @@ -435,12 +435,15 @@ pub(crate) fn generate_node_types_json( for variable in &syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { - if !simple_aliases.contains_key(&step.symbol) { - aliases_by_symbol - .entry(step.symbol) - .or_insert(HashSet::new()) - .insert(step.alias.clone()); - } + aliases_by_symbol + .entry(step.symbol) + .or_insert(HashSet::new()) + .insert( + step.alias + .as_ref() + .or_else(|| default_aliases.get(&step.symbol)) + .cloned(), + ); } } } @@ -1808,14 +1811,14 @@ mod tests { } fn get_node_types(grammar: InputGrammar) -> Vec { - let (syntax_grammar, lexical_grammar, _, simple_aliases) = + let (syntax_grammar, lexical_grammar, _, default_aliases) = prepare_grammar(&grammar).unwrap(); let variable_info = - get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap(); + get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar, - &simple_aliases, + &default_aliases, &variable_info, ) } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 04f9e47b79..58d99cc452 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -143,49 +143,6 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - let mut field_names = Vec::new(); - for production_info in &self.parse_table.production_infos { - for field_name in production_info.field_map.keys() { - field_names.push(field_name); - } - - for alias in &production_info.alias_sequence { - if let Some(alias) = &alias { - let alias_kind = alias.kind(); - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind - }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; - self.alias_ids.entry(alias.clone()).or_insert(alias_id); - } - } - } - - self.unique_aliases = self - .alias_ids - .keys() - .filter(|alias| { - self.parse_table - .symbols - .iter() - .cloned() - .find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias.kind() - }) - .is_none() - }) - .cloned() - .collect(); - self.unique_aliases.sort_unstable(); - self.symbol_map = self .parse_table .symbols @@ -230,13 +187,51 @@ impl Generator { }) .collect(); - field_names.sort_unstable(); - field_names.dedup(); - self.field_names = field_names.into_iter().cloned().collect(); + for production_info in &self.parse_table.production_infos { + // Build a list of all field names + for field_name in production_info.field_map.keys() { + if let Err(i) = self.field_names.binary_search(&field_name) { + self.field_names.insert(i, field_name.clone()); + } + } + + for alias in &production_info.alias_sequence { + // Generate a mapping from aliases to C identifiers. + if let Some(alias) = &alias { + let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + if let Some(default_alias) = self.default_aliases.get(symbol) { + default_alias == alias + } else { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + } + }); + + // Some aliases match an existing symbol in the grammar. + let alias_id; + if let Some(existing_symbol) = existing_symbol { + alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone(); + } + // Other aliases don't match any existing symbol, and need their own identifiers. + else { + if let Err(i) = self.unique_aliases.binary_search(alias) { + self.unique_aliases.insert(i, alias.clone()); + } + + alias_id = if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + } + + self.alias_ids.entry(alias.clone()).or_insert(alias_id); + } + } + } - // If we are opting in to the new unstable language ABI, then use the concept of - // "small parse states". Otherwise, use the same representation for all parse - // states. + // Determine which states should use the "small state" representation, and which should + // use the normal array representation. let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); self.large_state_count = self .parse_table From 99cd283e39d8dfb766fb365262fd08a419dd20a2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 14:07:39 -0800 Subject: [PATCH 245/282] query: Fix detection of repeated field names Fixes #790 --- cli/src/tests/query_test.rs | 27 +++++++++++++++++++++++++++ lib/src/tree_cursor.c | 14 +++++++++----- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 6bf6cbb001..082686ac9a 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1835,6 +1835,33 @@ fn test_query_matches_with_no_captures() { }); } +#[test] +fn test_query_matches_with_repeated_fields() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + "(field_declaration declarator: (field_identifier) @field)", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + struct S { + int a, b, c; + } + ", + &[ + (0, vec![("field", "a")]), + (0, vec![("field", "b")]), + (0, vec![("field", "c")]), + ], + ); + }); +} + #[test] fn test_query_captures_basic() { allocations::record(|| { diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 8af44a343b..98b8660584 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -330,7 +330,7 @@ void ts_tree_cursor_current_status( } } - #undef subtree_metadata + #undef subtree_symbol if (!ts_subtree_extra(*entry->subtree)) { const TSFieldMapEntry *field_map, *field_map_end; @@ -345,7 +345,6 @@ void ts_tree_cursor_current_status( for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { if (!i->inherited && i->child_index == entry->structural_child_index) { *field_id = i->field_id; - *can_have_later_siblings_with_this_field = false; break; } } @@ -354,9 +353,14 @@ void ts_tree_cursor_current_status( // Determine if the current node can have later siblings with the same field name. if (*field_id) { for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (i->field_id == *field_id && i->child_index > entry->structural_child_index) { - *can_have_later_siblings_with_this_field = true; - break; + if (i->field_id == *field_id) { + if ( + i->child_index > entry->structural_child_index || + (i->child_index == entry->structural_child_index && *has_later_named_siblings) + ) { + *can_have_later_siblings_with_this_field = true; + break; + } } } } From c439a676cf169e88234f768ca0f69d42e5bd68c5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 14:53:01 -0800 Subject: [PATCH 246/282] 0.17.3 --- Cargo.lock | 2 +- cli/Cargo.toml | 2 +- cli/npm/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d052511d76..2c2439738e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -832,7 +832,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.17.2" +version = "0.17.3" dependencies = [ "ansi_term", "atty", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a81ce16c89..48dbbff7b6 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.17.2" +version = "0.17.3" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff --git a/cli/npm/package.json b/cli/npm/package.json index f327698cbf..4c6dfe9027 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.17.2", + "version": "0.17.3", "author": "Max Brunsfeld", "license": "MIT", "repository": { From 281e75d74d78b0cbb6441bf497fdef0988ab49e4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 2 Nov 2020 20:53:08 -0800 Subject: [PATCH 247/282] rust binding: 0.17.1 --- Cargo.lock | 2 +- lib/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c2439738e..cd411095a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -824,7 +824,7 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.17.0" +version = "0.17.1" dependencies = [ "cc", "regex", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 2d13278896..8f88966f98 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.17.0" +version = "0.17.1" authors = ["Max Brunsfeld "] license = "MIT" readme = "binding_rust/README.md" From 2f897b4d7333cb18dc1b62408cd38dad839f4789 Mon Sep 17 00:00:00 2001 From: Stafford Brunk Date: Tue, 3 Nov 2020 08:20:20 -0700 Subject: [PATCH 248/282] Change QueryResult to be QueryCapture and QueryMatch matches/captures return 2 different types of object so this change corrects the return types --- lib/binding_web/tree-sitter-web.d.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index ae76e80300..80084c1135 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -137,9 +137,14 @@ declare module 'web-tree-sitter' { query(source: string): Query; } - interface QueryResult { + interface QueryCapture { + name: string; + node: SyntaxNode; + } + + interface QueryMatch { pattern: number; - captures: { name: string; node: SyntaxNode }[]; + captures: QueryCapture[]; } interface PredicateResult { @@ -151,8 +156,8 @@ declare module 'web-tree-sitter' { captureNames: string[]; delete(): void; - matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; - captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryResult[]; + matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryMatch[]; + captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryCapture[]; predicatesForPattern(patternIndex: number): PredicateResult[]; } } From 4e86b76e8c0fc8218eff7dcaa2029cf778f260a7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 3 Nov 2020 10:28:17 -0800 Subject: [PATCH 249/282] Update ruby error recovery test to reflect grammar tweaks --- test/fixtures/error_corpus/readme.md | 8 ++++++++ test/fixtures/error_corpus/ruby_errors.txt | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/error_corpus/readme.md diff --git a/test/fixtures/error_corpus/readme.md b/test/fixtures/error_corpus/readme.md new file mode 100644 index 0000000000..d8b5da09d8 --- /dev/null +++ b/test/fixtures/error_corpus/readme.md @@ -0,0 +1,8 @@ +The Error Corpus +================ + +This directory contains corpus tests that exercise error recovery in a variety of languages. + +These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error. + +Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree. diff --git a/test/fixtures/error_corpus/ruby_errors.txt b/test/fixtures/error_corpus/ruby_errors.txt index 9c35781c03..49dc2b32da 100644 --- a/test/fixtures/error_corpus/ruby_errors.txt +++ b/test/fixtures/error_corpus/ruby_errors.txt @@ -14,6 +14,6 @@ c method: (identifier) (ERROR (heredoc_beginning)) arguments: (argument_list - (heredoc_body (heredoc_end)) + (heredoc_body (heredoc_content) (heredoc_end)) (identifier) (MISSING ")")))) From 521297fdfe2e466bd0c7e81ac687b15431ffb496 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 10 Nov 2020 16:19:17 -0500 Subject: [PATCH 250/282] remove testing file --- test/fixtures/queries/python.py | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 test/fixtures/queries/python.py diff --git a/test/fixtures/queries/python.py b/test/fixtures/queries/python.py deleted file mode 100644 index 01ec9ab068..0000000000 --- a/test/fixtures/queries/python.py +++ /dev/null @@ -1,7 +0,0 @@ -def foo(): pass -# <- definition.function - -def bar(): -# <- definition.function - foo() - # <- reference.call From 50bccdf5dad00681cdcbd3d1275c40f6689ac7f1 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 10 Nov 2020 16:20:51 -0500 Subject: [PATCH 251/282] rename Assertion.expected to expected_capture_name --- cli/src/query_testing.rs | 10 ++++++---- cli/src/test_highlight.rs | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 8c6af706b2..fe4ec8a13f 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -19,7 +19,7 @@ pub struct CaptureInfo { #[derive(Debug, PartialEq, Eq)] pub struct Assertion { pub position: Point, - pub expected: String, + pub expected_capture_name: String, } /// Parse the given source code, finding all of the comments that contain @@ -81,7 +81,7 @@ pub fn parse_position_comments( assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { position: position, - expected: mat.as_str().to_string(), + expected_capture_name: mat.as_str().to_string(), }); } } @@ -135,8 +135,10 @@ pub fn assert_expected_captures( let contents = fs::read_to_string(path)?; let pairs = parse_position_comments(parser, language, contents.as_bytes())?; - let per_position_index: HashMap = - pairs.iter().map(|a| (a.position, &a.expected)).collect(); + let per_position_index: HashMap = pairs + .iter() + .map(|a| (a.position, &a.expected_capture_name)) + .collect(); for info in &infos { if !per_position_index.contains_key(&info.position) { diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 55a709aa4c..2517ea3c42 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -100,7 +100,7 @@ pub fn test_highlight( let mut actual_highlights = Vec::<&String>::new(); for Assertion { position, - expected: expected_highlight, + expected_capture_name: expected_highlight, } in &assertions { let mut passed = false; From 4604b40b72db3c7b52f3c6034ffd8527a5c5cbe4 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Tue, 10 Nov 2020 16:23:39 -0500 Subject: [PATCH 252/282] better name for capture regex --- cli/src/query_testing.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index fe4ec8a13f..58feec4283 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -7,7 +7,7 @@ use std::fs; use tree_sitter::{Language, Parser, Point}; lazy_static! { - static ref PROPERTY_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); + static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); } #[derive(Debug, Eq, PartialEq)] @@ -76,7 +76,7 @@ pub fn parse_position_comments( // If the comment node contains an arrow and a highlight name, record the // highlight name and the position. if let (true, Some(mat)) = - (has_arrow, PROPERTY_NAME_REGEX.find(&text[arrow_end..])) + (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..])) { assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { From bf41088bd1265159a34007e47e11a482ce3f8d9a Mon Sep 17 00:00:00 2001 From: Darkhan Kubigenov Date: Fri, 13 Nov 2020 19:01:02 +0600 Subject: [PATCH 253/282] tags: fix compilation on aarch64 Fixes #798 --- tags/src/c_lib.rs | 7 ++++--- tags/src/lib.rs | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 85de1ff6e0..8f689a9a8f 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,6 +1,7 @@ use super::{Error, TagsConfiguration, TagsContext}; use std::collections::HashMap; use std::ffi::CStr; +use std::os::raw::c_char; use std::process::abort; use std::sync::atomic::AtomicUsize; use std::{fmt, slice, str}; @@ -73,7 +74,7 @@ pub extern "C" fn ts_tagger_delete(this: *mut TSTagger) { #[no_mangle] pub extern "C" fn ts_tagger_add_language( this: *mut TSTagger, - scope_name: *const i8, + scope_name: *const c_char, language: Language, tags_query: *const u8, locals_query: *const u8, @@ -109,7 +110,7 @@ pub extern "C" fn ts_tagger_add_language( #[no_mangle] pub extern "C" fn ts_tagger_tag( this: *mut TSTagger, - scope_name: *const i8, + scope_name: *const c_char, source_code: *const u8, source_code_len: u32, output: *mut TSTagsBuffer, @@ -234,7 +235,7 @@ pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> #[no_mangle] pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( this: *mut TSTagger, - scope_name: *const i8, + scope_name: *const c_char, len: *mut u32, ) -> *const *const i8 { let tagger = unwrap_mut_ptr(this); diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 12db90cbf2..898090527f 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -5,6 +5,7 @@ use regex::Regex; use std::collections::HashMap; use std::ffi::{CStr, CString}; use std::ops::Range; +use std::os::raw::c_char; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{char, fmt, mem, str}; use tree_sitter::{ @@ -230,8 +231,9 @@ impl TagsConfiguration { pub fn syntax_type_name(&self, id: u32) -> &str { unsafe { - let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8) - .to_bytes(); + let cstr = + CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const c_char) + .to_bytes(); str::from_utf8(cstr).expect("syntax type name was not valid utf-8") } } From b267f90e6485c65d4db7360dd5cf6577701c75c2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 16 Nov 2020 10:51:08 -0800 Subject: [PATCH 254/282] Update unit tests to reflect python and ruby grammar changes --- cli/src/tests/node_test.rs | 2 +- cli/src/tests/query_test.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index d4a5a3f98a..7e652cd550 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -739,7 +739,7 @@ fn test_node_numeric_symbols_respect_simple_aliases() { let root = tree.root_node(); assert_eq!( root.to_sexp(), - "(program (binary left: (unary (identifier)) right: (identifier)))", + "(program (binary left: (unary operand: (identifier)) right: (identifier)))", ); let binary_node = root.child(0).unwrap(); diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 082686ac9a..02f222bb54 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1507,7 +1507,7 @@ fn test_query_matches_with_supertypes() { value: (expression) @kw_arg) (assignment - left: (left_hand_side (identifier) @var_def)) + left: (identifier) @var_def) (primary_expression/identifier) @var_ref "#, From f28334a476301b7ed8f33ce1453d6d4ad967f60b Mon Sep 17 00:00:00 2001 From: BonaBeavis Date: Fri, 20 Nov 2020 11:42:33 +0100 Subject: [PATCH 255/282] Add SPARQL parser to docs --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index d9410cc276..eca3f1a994 100644 --- a/docs/index.md +++ b/docs/index.md @@ -62,6 +62,7 @@ Parsers for these languages are in development: * [Julia](https://github.com/tree-sitter/tree-sitter-julia) * [Nix](https://github.com/cstrahan/tree-sitter-nix) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) +* [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) * [Swift](https://github.com/tree-sitter/tree-sitter-swift) ### Talks on Tree-sitter From f3d16f4770336c32b57d9547bd52f92fb7d6a257 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 23 Nov 2020 11:34:56 -0500 Subject: [PATCH 256/282] Fix tests. --- cli/src/test_highlight.rs | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 2517ea3c42..df870bf6d3 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -81,6 +81,72 @@ pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { Ok(()) } } +pub fn iterate_assertions( + assertions: &Vec, + highlights: &Vec<(Point, Point, Highlight)>, + highlight_names: &Vec, +) -> Result { + // Iterate through all of the highlighting assertions, checking each one against the + // actual highlights. + let mut i = 0; + let mut actual_highlights = Vec::<&String>::new(); + for Assertion { + position, + expected_capture_name: expected_highlight, + } in assertions + { + let mut passed = false; + actual_highlights.clear(); + + 'highlight_loop: loop { + // The assertions are ordered by position, so skip past all of the highlights that + // end at or before this assertion's position. + if let Some(highlight) = highlights.get(i) { + if highlight.1 <= *position { + i += 1; + continue; + } + + // Iterate through all of the highlights that start at or before this assertion's, + // position, looking for one that matches the assertion. + let mut j = i; + while let (false, Some(highlight)) = (passed, highlights.get(j)) { + if highlight.0 > *position { + break 'highlight_loop; + } + + // If the highlight matches the assertion, this test passes. Otherwise, + // add this highlight to the list of actual highlights that span the + // assertion's position, in order to generate an error message in the event + // of a failure. + let highlight_name = &highlight_names[(highlight.2).0]; + if *highlight_name == *expected_highlight { + passed = true; + break 'highlight_loop; + } else { + actual_highlights.push(highlight_name); + } + + j += 1; + } + } else { + break; + } + } + + if !passed { + return Err(Failure { + row: position.row, + column: position.column, + expected_highlight: expected_highlight.clone(), + actual_highlights: actual_highlights.into_iter().cloned().collect(), + } + .into()); + } + } + + Ok(assertions.len()) +} pub fn test_highlight( loader: &Loader, @@ -94,6 +160,8 @@ pub fn test_highlight( let assertions = parse_position_comments(highlighter.parser(), highlight_config.language, source)?; + iterate_assertions(&assertions, &highlights, &highlight_names)?; + // Iterate through all of the highlighting assertions, checking each one against the // actual highlights. let mut i = 0; From 0b4661e401f6430f56b1ad84467aa596fe7afad9 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 23 Nov 2020 11:41:16 -0500 Subject: [PATCH 257/282] Really fix the tests. --- cli/src/tests/test_highlight_test.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs index 669208237c..1a658281b0 100644 --- a/cli/src/tests/test_highlight_test.rs +++ b/cli/src/tests/test_highlight_test.rs @@ -33,15 +33,15 @@ fn test_highlight_test_with_basic_test() { &[ Assertion { position: Point::new(0, 5), - expected: "function".to_string() + expected_capture_name: "function".to_string() }, Assertion { position: Point::new(0, 11), - expected: "keyword".to_string() + expected_capture_name: "keyword".to_string() }, Assertion { position: Point::new(3, 9), - expected: "variable.parameter".to_string() + expected_capture_name: "variable.parameter".to_string() }, ] ); From 6764b803a0b93425586615decf6e343cdf1b31b0 Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 23 Nov 2020 11:58:07 -0500 Subject: [PATCH 258/282] Allow overlap in specs. --- cli/src/query.rs | 2 ++ cli/src/query_testing.rs | 30 ++++++++++++++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cli/src/query.rs b/cli/src/query.rs index 9c524877cf..bf67edf655 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -58,6 +58,7 @@ pub fn query_files_at_paths( results.push(query_testing::CaptureInfo { name: capture_name.to_string(), position: capture.node.start_position(), + terminus: capture.node.end_position(), }); } } else { @@ -85,6 +86,7 @@ pub fn query_files_at_paths( results.push(query_testing::CaptureInfo { name: capture_name.to_string(), position: capture.node.start_position(), + terminus: capture.node.end_position(), }); } } diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 58feec4283..96ccf6b248 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -2,7 +2,6 @@ use crate::error; use crate::error::Result; use lazy_static::lazy_static; use regex::Regex; -use std::collections::hash_map::HashMap; use std::fs; use tree_sitter::{Language, Parser, Point}; @@ -14,6 +13,7 @@ lazy_static! { pub struct CaptureInfo { pub name: String, pub position: Point, + pub terminus: Point, } #[derive(Debug, PartialEq, Eq)] @@ -134,22 +134,20 @@ pub fn assert_expected_captures( ) -> Result<()> { let contents = fs::read_to_string(path)?; let pairs = parse_position_comments(parser, language, contents.as_bytes())?; - - let per_position_index: HashMap = pairs - .iter() - .map(|a| (a.position, &a.expected_capture_name)) - .collect(); - for info in &infos { - if !per_position_index.contains_key(&info.position) { - continue; - } - let found = per_position_index.get(&info.position).unwrap(); - if **found != info.name && info.name != "name" { - Err(error::Error::new(format!( - "Assertion failed: at {}, found {}, expected {}", - info.position, found, info.name - )))? + let found = pairs.iter().find(|p| { + p.position.row == info.position.row + && p.position >= info.position + && p.position < info.terminus + }); + + if let Some(found) = found { + if found.expected_capture_name != info.name && info.name != "name" { + Err(error::Error::new(format!( + "Assertion failed: at {}, found {}, expected {}", + info.position, found.expected_capture_name, info.name + )))? + } } } Ok(()) From e1da6e554bf9235f613f44baeb5496663a6c12df Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 23 Nov 2020 12:01:08 -0500 Subject: [PATCH 259/282] Remove fanciful nomenclature. --- cli/src/query.rs | 8 ++++---- cli/src/query_testing.rs | 10 ++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/cli/src/query.rs b/cli/src/query.rs index bf67edf655..485fdb82c3 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -57,8 +57,8 @@ pub fn query_files_at_paths( )?; results.push(query_testing::CaptureInfo { name: capture_name.to_string(), - position: capture.node.start_position(), - terminus: capture.node.end_position(), + start: capture.node.start_position(), + end: capture.node.end_position(), }); } } else { @@ -85,8 +85,8 @@ pub fn query_files_at_paths( } results.push(query_testing::CaptureInfo { name: capture_name.to_string(), - position: capture.node.start_position(), - terminus: capture.node.end_position(), + start: capture.node.start_position(), + end: capture.node.end_position(), }); } } diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 96ccf6b248..2a9a8c2dd7 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -12,8 +12,8 @@ lazy_static! { #[derive(Debug, Eq, PartialEq)] pub struct CaptureInfo { pub name: String, - pub position: Point, - pub terminus: Point, + pub start: Point, + pub end: Point, } #[derive(Debug, PartialEq, Eq)] @@ -136,16 +136,14 @@ pub fn assert_expected_captures( let pairs = parse_position_comments(parser, language, contents.as_bytes())?; for info in &infos { let found = pairs.iter().find(|p| { - p.position.row == info.position.row - && p.position >= info.position - && p.position < info.terminus + p.position.row == info.start.row && p.position >= info.start && p.position < info.end }); if let Some(found) = found { if found.expected_capture_name != info.name && info.name != "name" { Err(error::Error::new(format!( "Assertion failed: at {}, found {}, expected {}", - info.position, found.expected_capture_name, info.name + info.start, found.expected_capture_name, info.name )))? } } From cc8f978b3b4007975f7a6f2a9a43dd5bc5f8ec4b Mon Sep 17 00:00:00 2001 From: Patrick Thomson Date: Mon, 23 Nov 2020 12:05:32 -0500 Subject: [PATCH 260/282] inline this lambda --- cli/src/query_testing.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 2a9a8c2dd7..ef02ec69e2 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -135,11 +135,9 @@ pub fn assert_expected_captures( let contents = fs::read_to_string(path)?; let pairs = parse_position_comments(parser, language, contents.as_bytes())?; for info in &infos { - let found = pairs.iter().find(|p| { + if let Some(found) = pairs.iter().find(|p| { p.position.row == info.start.row && p.position >= info.start && p.position < info.end - }); - - if let Some(found) = found { + }) { if found.expected_capture_name != info.name && info.name != "name" { Err(error::Error::new(format!( "Assertion failed: at {}, found {}, expected {}", From 11e7b108f221eb8ac3827a77d8e9dc9ed3dd67b2 Mon Sep 17 00:00:00 2001 From: Luis Hagenauer Date: Tue, 24 Nov 2020 12:35:38 +0100 Subject: [PATCH 261/282] highlight: Make README snippets compile --- highlight/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/highlight/README.md b/highlight/README.md index ae462f9d8b..07edc421c0 100644 --- a/highlight/README.md +++ b/highlight/README.md @@ -17,7 +17,7 @@ extern "C" tree_sitter_javascript(); Define the list of highlight names that you will recognize: ```rust -let highlight_names = [ +let highlight_names : Vec = [ "attribute", "constant", "function.builtin", @@ -93,14 +93,14 @@ let highlights = highlighter.highlight( ).unwrap(); for event in highlights { - match event? { + match event.unwrap() { HighlightEvent::Source {start, end} => { eprintln!("source: {}-{}", start, end); }, - HighlightEvent::HighlightStart(s) { + HighlightEvent::HighlightStart(s) => { eprintln!("highlight style started: {:?}", s); }, - HighlightEvent::HighlightEnd { + HighlightEvent::HighlightEnd => { eprintln!("highlight style ended"); }, } From 7ef73b2e085acd8f45bbf998fea84c756a05674f Mon Sep 17 00:00:00 2001 From: Joel Spadin Date: Thu, 26 Nov 2020 14:43:27 -0600 Subject: [PATCH 262/282] web binding: fix equals() Node.equals() used to always return true. Now it works. Also added unit tests for it. --- lib/binding_web/binding.js | 6 +----- lib/binding_web/test/node-test.js | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index f731e8f868..15b0711677 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -258,11 +258,7 @@ class Node { } equals(other) { - if (this === other) return true; - for (let i = 0; i < 5; i++) { - if (this[i] !== other[i]) return false; - } - return true; + return this.id === other.id; } child(index) { diff --git a/lib/binding_web/test/node-test.js b/lib/binding_web/test/node-test.js index 933ff38f67..6bbcafb00a 100644 --- a/lib/binding_web/test/node-test.js +++ b/lib/binding_web/test/node-test.js @@ -388,4 +388,24 @@ describe("Node", () => { assert.throws(() => number.closest({a: 1}), /Argument must be a string or array of strings/) }); }); + + describe('.equals(other)', () => { + it('returns true if the nodes are the same', () => { + tree = parser.parse('1 + 2'); + + const sumNode = tree.rootNode.firstChild.firstChild; + const node1 = sumNode.firstChild; + const node2 = sumNode.firstChild; + assert(node1.equals(node2)); + }); + + it('returns false if the nodes are not the same', () => { + tree = parser.parse('1 + 2'); + + const sumNode = tree.rootNode.firstChild.firstChild; + const node1 = sumNode.firstChild; + const node2 = node1.nextSibling; + assert(!node1.equals(node2)); + }); + }); }); From a2d6048226ceb1b09a7e4cf330d75ced0d3f27a3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 14:28:27 -0800 Subject: [PATCH 263/282] Get the wasm build working w/ latest emscripten --- lib/binding_web/binding.c | 27 +++++++++++++++++---------- lib/binding_web/binding.js | 2 +- lib/binding_web/imports.js | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index eb463b26cd..9180f4059f 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -115,18 +115,10 @@ extern void tree_sitter_parse_callback( ); extern void tree_sitter_log_callback( - void *payload, - TSLogType log_type, + bool is_lex_message, const char *message ); -void ts_parser_new_wasm() { - TSParser *parser = ts_parser_new(); - char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char)); - TRANSFER_BUFFER[0] = parser; - TRANSFER_BUFFER[1] = input_buffer; -} - static const char *call_parse_callback( void *payload, uint32_t byte, @@ -148,8 +140,23 @@ static const char *call_parse_callback( return buffer; } +static void call_log_callback( + void *payload, + TSLogType log_type, + const char *message +) { + tree_sitter_log_callback(log_type == TSLogTypeLex, message); +} + +void ts_parser_new_wasm() { + TSParser *parser = ts_parser_new(); + char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char)); + TRANSFER_BUFFER[0] = parser; + TRANSFER_BUFFER[1] = input_buffer; +} + void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) { - TSLogger logger = {self, should_log ? tree_sitter_log_callback : NULL}; + TSLogger logger = {self, should_log ? call_log_callback : NULL}; ts_parser_set_logger(self, logger); } diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 15b0711677..95bfa82886 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -880,7 +880,7 @@ class Language { } return bytes - .then(bytes => loadWebAssemblyModule(bytes, {loadAsync: true})) + .then(bytes => loadSideModule(bytes, {loadAsync: true})) .then(mod => { const symbolNames = Object.keys(mod) const functionName = symbolNames.find(key => diff --git a/lib/binding_web/imports.js b/lib/binding_web/imports.js index ea34926f01..a76c42ac82 100644 --- a/lib/binding_web/imports.js +++ b/lib/binding_web/imports.js @@ -16,7 +16,7 @@ mergeInto(LibraryManager.library, { } }, - tree_sitter_log_callback: function(_payload, isLexMessage, messageAddress) { + tree_sitter_log_callback: function(isLexMessage, messageAddress) { if (currentLogCallback) { const message = UTF8ToString(messageAddress); currentLogCallback(message, isLexMessage !== 0); From b118e7d7505d5f5621b7cf269a1b155e6f20588e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 15:28:21 -0800 Subject: [PATCH 264/282] Make binding.js syntactically valid Put the end of the surrounding closure into a separate file, suffix.js. --- lib/binding_web/binding.js | 5 ----- lib/binding_web/suffix.js | 2 ++ script/build-wasm | 1 + 3 files changed, 3 insertions(+), 5 deletions(-) create mode 100644 lib/binding_web/suffix.js diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 95bfa82886..b060715f08 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -7,7 +7,6 @@ const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT; const ZERO_POINT = {row: 0, column: 0}; const QUERY_WORD_REGEX = /[\w-.]*/g; -const PREDICATE_STEP_TYPE_DONE = 0; const PREDICATE_STEP_TYPE_CAPTURE = 1; const PREDICATE_STEP_TYPE_STRING = 2; @@ -1140,7 +1139,3 @@ function marshalEdit(edit) { } Parser.Language = Language; - -return Parser; - -})); diff --git a/lib/binding_web/suffix.js b/lib/binding_web/suffix.js new file mode 100644 index 0000000000..0e9fe0217a --- /dev/null +++ b/lib/binding_web/suffix.js @@ -0,0 +1,2 @@ +return Parser; +})); diff --git a/script/build-wasm b/script/build-wasm index 63ec4fe0a0..b139f6c25f 100755 --- a/script/build-wasm +++ b/script/build-wasm @@ -95,6 +95,7 @@ $emcc \ --js-library ${web_dir}/imports.js \ --pre-js ${web_dir}/prefix.js \ --post-js ${web_dir}/binding.js \ + --post-js ${web_dir}/suffix.js \ lib/src/lib.c \ ${web_dir}/binding.c \ -o target/scratch/tree-sitter.js From 751ffd2ee13ef7b29de60585fec3a52dab1f5b4e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 16:25:01 -0800 Subject: [PATCH 265/282] Use new emscripten when building with docker --- cli/src/wasm.rs | 2 +- script/build-wasm | 10 +++++----- script/generate-fixtures-wasm | 8 +++++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 47cea90ad2..8bbcfbdf46 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -57,7 +57,7 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu } // Run `emcc` in a container using the `emscripten-slim` image - command.args(&["trzeci/emscripten-slim", "emcc"]); + command.args(&["emscripten/emsdk", "emcc"]); } else { return Error::err( "You must have either emcc or docker on your PATH to run this command".to_string(), diff --git a/script/build-wasm b/script/build-wasm index b139f6c25f..75c6a7d17c 100755 --- a/script/build-wasm +++ b/script/build-wasm @@ -64,11 +64,11 @@ emcc= if which emcc > /dev/null && [[ "$force_docker" == "0" ]]; then emcc=emcc elif which docker > /dev/null; then - emcc="docker run \ - --rm \ - -v $(pwd):/src:Z \ - -u $(id -u) \ - trzeci/emscripten-slim \ + emcc="docker run \ + --rm \ + -v $(pwd):/src:Z \ + -u $(id -u) \ + emscripten/emsdk \ emcc" else echo 'You must have either `docker` or `emcc` on your PATH to run this script' diff --git a/script/generate-fixtures-wasm b/script/generate-fixtures-wasm index a987e31a5e..9d44b58cbc 100755 --- a/script/generate-fixtures-wasm +++ b/script/generate-fixtures-wasm @@ -4,6 +4,12 @@ set -e cargo build --release +build_wasm_args= +if [[ $1 == "--docker" ]]; then + build_wasm_args="--docker" + shift +fi + filter_grammar_name=$1 root_dir=$PWD @@ -20,7 +26,7 @@ while read -r grammar_file; do fi echo "Compiling ${grammar_name} parser to wasm" - "$tree_sitter" build-wasm $grammar_dir + "$tree_sitter" build-wasm $build_wasm_args $grammar_dir done <<< "$grammar_files" mv tree-sitter-*.wasm target/release/ From 18980b7b99757e4ffa262a49501ae07ad7a8d986 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 16:25:17 -0800 Subject: [PATCH 266/282] wasm: Avoid registering uncaught exception/rejection handlers --- script/build-wasm | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/script/build-wasm b/script/build-wasm index 75c6a7d17c..1b5e48ec47 100755 --- a/script/build-wasm +++ b/script/build-wasm @@ -30,7 +30,6 @@ EOF set -e web_dir=lib/binding_web -exports=$(cat ${web_dir}/exports.json) emscripten_flags="-O3" minify_js=1 force_docker=0 @@ -79,25 +78,27 @@ mkdir -p target/scratch # Use emscripten to generate `tree-sitter.js` and `tree-sitter.wasm` # in the `target/scratch` directory -$emcc \ - -s WASM=1 \ - -s TOTAL_MEMORY=33554432 \ - -s ALLOW_MEMORY_GROWTH=1 \ - -s MAIN_MODULE=2 \ - -s NO_FILESYSTEM=1 \ - -s "EXPORTED_FUNCTIONS=${exports}" \ - $emscripten_flags \ - -std=c99 \ - -D 'fprintf(...)=' \ - -D NDEBUG= \ - -I lib/src \ - -I lib/include \ - --js-library ${web_dir}/imports.js \ - --pre-js ${web_dir}/prefix.js \ - --post-js ${web_dir}/binding.js \ - --post-js ${web_dir}/suffix.js \ - lib/src/lib.c \ - ${web_dir}/binding.c \ +$emcc \ + -s WASM=1 \ + -s TOTAL_MEMORY=33554432 \ + -s ALLOW_MEMORY_GROWTH=1 \ + -s MAIN_MODULE=2 \ + -s NO_FILESYSTEM=1 \ + -s NODEJS_CATCH_EXIT=0 \ + -s NODEJS_CATCH_REJECTION=0 \ + -s EXPORTED_FUNCTIONS=@${web_dir}/exports.json \ + $emscripten_flags \ + -std=c99 \ + -D 'fprintf(...)=' \ + -D NDEBUG= \ + -I lib/src \ + -I lib/include \ + --js-library ${web_dir}/imports.js \ + --pre-js ${web_dir}/prefix.js \ + --post-js ${web_dir}/binding.js \ + --post-js ${web_dir}/suffix.js \ + lib/src/lib.c \ + ${web_dir}/binding.c \ -o target/scratch/tree-sitter.js # Use terser to write a minified version of `tree-sitter.js` into From 2699c01ab1c588da81f9d86c97488876c0a0b6c4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 16:45:45 -0800 Subject: [PATCH 267/282] Use latest emscripten on CI --- script/fetch-emscripten | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/fetch-emscripten b/script/fetch-emscripten index d16c857e04..c1b072ad14 100755 --- a/script/fetch-emscripten +++ b/script/fetch-emscripten @@ -2,7 +2,7 @@ set -e -EMSCRIPTEN_VERSION=1.39.15 +EMSCRIPTEN_VERSION=2.0.9 mkdir -p target EMSDK_DIR="./target/emsdk" From 591a2c62495ca81109e061846a7a6eb8e66ecfac Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 16:46:02 -0800 Subject: [PATCH 268/282] Remove web binding paths from travis config --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7205ae03f1..79d84d135d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,8 +58,6 @@ deploy: file_glob: true file: - "tree-sitter-*.gz" - - "target/release/tree-sitter.js" - - "target/release/tree-sitter.wasm" draft: true overwrite: true skip_cleanup: true From d3f30e298b9caed7058c95e9535f5a33a4be6648 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 30 Nov 2020 20:36:36 -0800 Subject: [PATCH 269/282] Use node 12 on travis --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 79d84d135d..282ba02d35 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,8 @@ matrix: before_install: # Install node - - nvm install 10 - - nvm use 10 + - nvm install 12 + - nvm use 12 # Download emscripten and create a shorthand for adding it to the PATH. # Don't add it to the path globally because it overrides the default From 96f259d8c56b2cf567de042b416445f1ad2634a2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 3 Dec 2020 09:48:20 -0800 Subject: [PATCH 270/282] Run rustfmt --- cli/src/test.rs | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/cli/src/test.rs b/cli/src/test.rs index 50c272201f..c8cfe89f30 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -77,7 +77,15 @@ pub fn run_tests_at_path( let mut failures = Vec::new(); let mut corrected_entries = Vec::new(); - run_tests(&mut parser, test_entry, filter, 0, &mut failures, update, &mut corrected_entries)?; + run_tests( + &mut parser, + test_entry, + filter, + 0, + &mut failures, + update, + &mut corrected_entries, + )?; if failures.len() > 0 { println!(""); @@ -210,7 +218,11 @@ fn run_tests( failures.push((name, actual, output)); } } - TestEntry::Group { name, children, file_path } => { + TestEntry::Group { + name, + children, + file_path, + } => { if indent_level > 0 { for _ in 0..indent_level { print!(" "); @@ -222,7 +234,15 @@ fn run_tests( indent_level += 1; for child in children { - run_tests(parser, child, filter, indent_level, failures, update, corrected_entries)?; + run_tests( + parser, + child, + filter, + indent_level, + failures, + update, + corrected_entries, + )?; } if let Some(file_path) = file_path { @@ -292,7 +312,7 @@ fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String } fn write_tests_to_buffer( - buffer: &mut Write, + buffer: &mut impl Write, corrected_entries: &Vec<(String, String, String)>, ) -> Result<()> { for (i, (name, input, output)) in corrected_entries.iter().enumerate() { @@ -328,7 +348,11 @@ pub fn parse_tests(path: &Path) -> io::Result { children.push(parse_tests(&entry.path())?); } } - Ok(TestEntry::Group { name, children, file_path: None }) + Ok(TestEntry::Group { + name, + children, + file_path: None, + }) } else { let content = fs::read_to_string(path)?; Ok(parse_test_content(name, content, Some(path.to_path_buf()))) @@ -390,7 +414,11 @@ fn parse_test_content(name: String, content: String, file_path: Option) .to_string(); prev_header_end = header_end; } - TestEntry::Group { name, children, file_path } + TestEntry::Group { + name, + children, + file_path, + } } #[cfg(test)] From 94c61de35358bacab8251c47081b85ae84d9b86c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 2 Dec 2020 16:12:01 -0800 Subject: [PATCH 271/282] Update JS error recovery test to reflect grammar change --- test/fixtures/error_corpus/javascript_errors.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index ad71037c7e..4359ae6851 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -148,7 +148,8 @@ const h = `i ${j(k} l` (lexical_declaration (variable_declarator (identifier) - (template_string (template_substitution (identifier) (ERROR))))) + (template_string (template_substitution + (augmented_assignment_expression (identifier) (MISSING identifier)))))) (lexical_declaration (variable_declarator (identifier) From 5008700735be65bb81cf3de5b292708778c3562a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 4 Dec 2020 14:36:28 -0800 Subject: [PATCH 272/282] wasm: Look for both loadWebAssemblyModule and loadSideModule See https://github.com/emscripten-core/emscripten/pull/12969 --- lib/binding_web/binding.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index b060715f08..e22d5b5b65 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -878,8 +878,14 @@ class Language { })); } + // emscripten-core/emscripten#12969 + const loadModule = + typeof loadSideModule === 'function' + ? loadSideModule + : loadWebAssemblyModule; + return bytes - .then(bytes => loadSideModule(bytes, {loadAsync: true})) + .then(bytes => loadModule(bytes, {loadAsync: true})) .then(mod => { const symbolNames = Object.keys(mod) const functionName = symbolNames.find(key => From e5ef2f2aa3c6d2cafcdd01e81f0852cb4cd21280 Mon Sep 17 00:00:00 2001 From: Jim Hester Date: Mon, 7 Dec 2020 09:05:03 -0500 Subject: [PATCH 273/282] Add link to R tree sitter grammar --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index eca3f1a994..1293ec4866 100644 --- a/docs/index.md +++ b/docs/index.md @@ -46,6 +46,7 @@ Parsers for these languages are fairly complete: * [Python](https://github.com/tree-sitter/tree-sitter-python) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) +* [R](https://github.com/r-lib/tree-sitter-r) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) From 0f492e4254aab125d5828ed95a506ddb684c9201 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 11 Dec 2020 13:47:20 -0800 Subject: [PATCH 274/282] Include ts_tree_copy in wasm build Fixes #846 --- lib/binding_web/exports.json | 1 + lib/binding_web/test/tree-test.js | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index 7210515863..d0173f3afd 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -79,6 +79,7 @@ "_ts_query_predicates_for_pattern", "_ts_query_string_count", "_ts_query_string_value_for_id", + "_ts_tree_copy", "_ts_tree_cursor_current_field_id_wasm", "_ts_tree_cursor_current_node_id_wasm", "_ts_tree_cursor_current_node_is_missing_wasm", diff --git a/lib/binding_web/test/tree-test.js b/lib/binding_web/test/tree-test.js index ccb7a830be..8c04e63ea2 100644 --- a/lib/binding_web/test/tree-test.js +++ b/lib/binding_web/test/tree-test.js @@ -323,6 +323,31 @@ describe("Tree", () => { assert(!cursor.gotoParent()); }) }); + + describe(".copy", () => { + it("creates another tree that remains stable if the original tree is edited", () => { + input = 'abc + cde'; + tree = parser.parse(input); + assert.equal( + tree.rootNode.toString(), + "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" + ); + + const tree2 = tree.copy(); + ([input, edit] = spliceInput(input, 3, 0, '123')); + assert.equal(input, 'abc123 + cde'); + tree.edit(edit); + + const leftNode = tree.rootNode.firstChild.firstChild.firstChild; + const leftNode2 = tree2.rootNode.firstChild.firstChild.firstChild; + const rightNode = tree.rootNode.firstChild.firstChild.lastChild; + const rightNode2 = tree2.rootNode.firstChild.firstChild.lastChild; + assert.equal(leftNode.endIndex, 6) + assert.equal(leftNode2.endIndex, 3) + assert.equal(rightNode.startIndex, 9) + assert.equal(rightNode2.startIndex, 6) + }); + }); }); function spliceInput(input, startIndex, lengthRemoved, newText) { From 4336d9c8c50ca700556be2ece99bf9d4d743c13f Mon Sep 17 00:00:00 2001 From: Henrique Oliveira Pinto Date: Fri, 11 Dec 2020 17:57:15 -0500 Subject: [PATCH 275/282] Update binding_web/README.md releases link The previous link pointed to a specific version and it made me think that the project had no releases for a year! --- lib/binding_web/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/binding_web/README.md b/lib/binding_web/README.md index ba1b4cb6fa..dc0d44bafc 100644 --- a/lib/binding_web/README.md +++ b/lib/binding_web/README.md @@ -7,7 +7,7 @@ WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-si ### Setup -You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/tag/0.14.7) and load them using a standalone script: +You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases) and load them using a standalone script: ```html