diff --git a/.gitignore b/.gitignore index 03522cc01b..b7413ac159 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ docs/assets/js/tree-sitter.js /target *.rs.bk *.a +*.dylib *.o *.obj *.exp diff --git a/.travis.yml b/.travis.yml index 0923033a06..282ba02d35 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: rust rust: - stable +env: + CFLAGS="-Wall -Wextra -Werror -Wstrict-prototypes" + matrix: include: - os: osx @@ -11,8 +14,8 @@ matrix: before_install: # Install node - - nvm install 10 - - nvm use 10 + - nvm install 12 + - nvm use 12 # Download emscripten and create a shorthand for adding it to the PATH. # Don't add it to the path globally because it overrides the default @@ -23,6 +26,9 @@ script: # Build the WASM binding - (eval "$WASM_ENV" && script/build-wasm) + # build the shared/static libraries + - make + # Build the CLI - cargo build --release @@ -32,7 +38,6 @@ script: - (eval "$WASM_ENV" && script/generate-fixtures-wasm) # Run the tests - - export TREE_SITTER_STATIC_ANALYSIS=1 - script/test - script/test-wasm - script/benchmark @@ -53,8 +58,6 @@ deploy: file_glob: true file: - "tree-sitter-*.gz" - - "target/release/tree-sitter.js" - - "target/release/tree-sitter.wasm" draft: true overwrite: true skip_cleanup: true @@ -65,5 +68,3 @@ cache: cargo: true directories: - target/emsdk - - test/fixtures/grammars - - $HOME/.emscripten_cache diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 120000 index 0000000000..4f64371073 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +docs/section-6-contributing.md \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index f539e1816e..c13deb131e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,989 +4,1004 @@ name = "aho-corasick" version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" dependencies = [ - "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr", ] [[package]] name = "ansi_term" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" dependencies = [ - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "arrayref" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" [[package]] name = "arrayvec" version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" dependencies = [ - "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop", ] [[package]] name = "ascii" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14" [[package]] name = "atty" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "termion", + "winapi", ] [[package]] name = "autocfg" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" [[package]] name = "backtrace" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" dependencies = [ - "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace-sys", + "cfg-if", + "libc", + "rustc-demangle", + "winapi", ] [[package]] name = "backtrace-sys" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "libc", ] [[package]] name = "base64" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" dependencies = [ - "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder", ] [[package]] name = "bitflags" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" [[package]] name = "blake2b_simd" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" dependencies = [ - "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", - "constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "arrayref", + "arrayvec", + "constant_time_eq", ] [[package]] name = "byteorder" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" [[package]] name = "c2-chacha" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", + "ppv-lite86", ] [[package]] name = "cc" -version = "1.0.25" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" [[package]] name = "cfg-if" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" [[package]] name = "chrono" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" dependencies = [ - "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", - "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer", + "num-traits", + "time", ] [[package]] name = "chunked_transfer" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87" [[package]] name = "clap" version = "2.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", ] [[package]] name = "cloudabi" version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags", ] [[package]] name = "constant_time_eq" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" [[package]] name = "crossbeam-utils" version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "lazy_static", ] [[package]] name = "difference" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] name = "dirs" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "dirs-sys", ] [[package]] name = "dirs-sys" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "redox_users", + "winapi", ] [[package]] name = "failure" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" dependencies = [ - "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace", + "failure_derive", ] [[package]] name = "failure_derive" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", - "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", + "synstructure", ] [[package]] name = "fuchsia-zircon" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags", + "fuchsia-zircon-sys", ] [[package]] name = "fuchsia-zircon-sys" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" [[package]] name = "getrandom" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", ] [[package]] name = "glob" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "html-escape" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d348900ce941b7474395ba922ed3735a517df4546a2939ddb416ce85eeaa988e" +dependencies = [ + "utf8-width", +] [[package]] name = "idna" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches", + "unicode-bidi", + "unicode-normalization", ] [[package]] name = "indexmap" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" [[package]] name = "itoa" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" [[package]] name = "lazy_static" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" [[package]] name = "libc" version = "0.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" [[package]] name = "libloading" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "winapi", ] [[package]] name = "lock_api" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" dependencies = [ - "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard", ] [[package]] name = "log" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", ] [[package]] name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" [[package]] name = "memchr" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" [[package]] name = "nodrop" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" [[package]] name = "num-integer" version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" dependencies = [ - "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits", ] [[package]] name = "num-traits" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" [[package]] name = "once_cell" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" dependencies = [ - "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot", ] [[package]] name = "parking_lot" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" dependencies = [ - "lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lock_api", + "parking_lot_core", ] [[package]] name = "parking_lot_core" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "rand 0.6.4", + "rustc_version", + "smallvec", + "winapi", ] [[package]] name = "percent-encoding" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" [[package]] name = "ppv-lite86" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" [[package]] name = "proc-macro2" version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" dependencies = [ - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid", ] [[package]] name = "quote" version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", ] [[package]] name = "rand" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5" dependencies = [ - "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "libc", + "rand_chacha 0.1.1", + "rand_core 0.3.0", + "rand_hc 0.1.0", + "rand_isaac", + "rand_os", + "rand_pcg", + "rand_xorshift", + "winapi", ] [[package]] name = "rand" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" dependencies = [ - "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", + "libc", + "rand_chacha 0.2.1", + "rand_core 0.5.0", + "rand_hc 0.2.0", ] [[package]] name = "rand_chacha" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" dependencies = [ - "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "rand_core 0.3.0", ] [[package]] name = "rand_chacha" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" dependencies = [ - "c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "c2-chacha", + "rand_core 0.5.0", ] [[package]] name = "rand_core" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" [[package]] name = "rand_core" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca" dependencies = [ - "getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", ] [[package]] name = "rand_hc" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rand_hc" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.0", ] [[package]] name = "rand_isaac" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rand_os" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca" dependencies = [ - "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cloudabi", + "fuchsia-zircon", + "libc", + "rand_core 0.3.0", + "rdrand", + "winapi", ] [[package]] name = "rand_pcg" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", + "rustc_version", ] [[package]] name = "rand_xorshift" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "rdrand" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" dependencies = [ - "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0", ] [[package]] name = "redox_syscall" version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" [[package]] name = "redox_termios" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" dependencies = [ - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall", ] [[package]] name = "redox_users" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" dependencies = [ - "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "failure", + "rand_os", + "redox_syscall", + "rust-argon2", ] [[package]] name = "regex" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" dependencies = [ - "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", + "utf8-ranges", ] [[package]] name = "regex-syntax" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" dependencies = [ - "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ucd-util", ] [[package]] name = "remove_dir_all" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" dependencies = [ - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "rust-argon2" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" dependencies = [ - "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", - "blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", + "base64", + "blake2b_simd", + "crossbeam-utils", ] [[package]] name = "rustc-demangle" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" [[package]] name = "rustc_version" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" dependencies = [ - "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "semver", ] [[package]] name = "ryu" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" [[package]] name = "scopeguard" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" [[package]] name = "semver" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "semver-parser", ] [[package]] name = "semver-parser" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" [[package]] name = "serde_derive" version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "serde_json" version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" dependencies = [ - "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap", + "itoa", + "ryu", + "serde", ] [[package]] name = "smallbitvec" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" [[package]] name = "smallvec" version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" dependencies = [ - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unreachable", ] [[package]] name = "spin" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" [[package]] name = "strsim" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" [[package]] name = "syn" version = "0.15.22" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "unicode-xid", ] [[package]] name = "synstructure" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" dependencies = [ - "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "syn", + "unicode-xid", ] [[package]] name = "tempfile" version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a" dependencies = [ - "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "rand 0.6.4", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "termion" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "redox_syscall", + "redox_termios", ] [[package]] name = "textwrap" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" dependencies = [ - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width", ] [[package]] name = "thread_local" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", ] [[package]] name = "time" version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", + "redox_syscall", + "winapi", ] [[package]] name = "tiny_http" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951" dependencies = [ - "ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)", - "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "ascii", + "chrono", + "chunked_transfer", + "log", + "url", ] [[package]] name = "tree-sitter" -version = "0.6.3" +version = "0.17.1" dependencies = [ - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cc", + "regex", ] [[package]] name = "tree-sitter-cli" -version = "0.16.5" -dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", - "smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.6.3", - "tree-sitter-highlight 0.1.6", - "tree-sitter-tags 0.1.6", - "webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +version = "0.18.0" +dependencies = [ + "ansi_term", + "atty", + "cc", + "clap", + "difference", + "dirs", + "glob", + "html-escape", + "lazy_static", + "libloading", + "log", + "once_cell", + "rand 0.7.0", + "regex", + "regex-syntax", + "serde", + "serde_derive", + "serde_json", + "smallbitvec", + "spin", + "tempfile", + "tiny_http", + "tree-sitter", + "tree-sitter-highlight", + "tree-sitter-tags", + "webbrowser", ] [[package]] name = "tree-sitter-highlight" -version = "0.1.6" +version = "0.3.0" dependencies = [ - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.6.3", + "regex", + "tree-sitter", ] [[package]] name = "tree-sitter-tags" -version = "0.1.6" +version = "0.3.0" dependencies = [ - "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tree-sitter 0.6.3", + "memchr", + "regex", + "tree-sitter", ] [[package]] name = "ucd-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" [[package]] name = "unicode-bidi" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" dependencies = [ - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches", ] [[package]] name = "unicode-normalization" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" dependencies = [ - "smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec", ] [[package]] name = "unicode-width" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" [[package]] name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" [[package]] name = "unreachable" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "void", ] [[package]] name = "url" version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" dependencies = [ - "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "idna", + "matches", + "percent-encoding", ] [[package]] name = "utf8-ranges" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" + +[[package]] +name = "utf8-width" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa" [[package]] name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" [[package]] name = "void" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "webbrowser" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac" dependencies = [ - "widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "widestring", + "winapi", ] [[package]] name = "widestring" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6" [[package]] name = "winapi" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" -"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" -"checksum arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" -"checksum ascii 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "97be891acc47ca214468e09425d02cef3af2c94d0d82081cd02061f996802f14" -"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" -"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" -"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" -"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" -"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" -"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" -"checksum blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2" -"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" -"checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" -"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" -"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" -"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" -"checksum chunked_transfer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "498d20a7aaf62625b9bf26e637cf7736417cde1d0c99f1d04d1170229a85cf87" -"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" -"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" -"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" -"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" -"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" -"checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" -"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" -"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" -"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" -"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" -"checksum getrandom 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "34f33de6f0ae7c9cb5e574502a562e2b512799e32abb801cd1e79ad952b62b49" -"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" -"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" -"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" -"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" -"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" -"checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" -"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" -"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" -"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" -"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -"checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" -"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" -"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" -"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" -"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37" -"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337" -"checksum parking_lot_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94c8c7923936b28d546dfd14d4472eaf34c99b14e1c973a32b3e6d4eb04298c9" -"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" -"checksum ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" -"checksum proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "77619697826f31a02ae974457af0b29b723e5619e113e9397b8b82c6bd253f09" -"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" -"checksum rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3906503e80ac6cbcacb2c2973fa8e473f24d7e2747c8c92bb230c2441cad96b5" -"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" -"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" -"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" -"checksum rand_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "615e683324e75af5d43d8f7a39ffe3ee4a9dc42c5c701167a71dc59c3a493aca" -"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -"checksum rand_os 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f46fbd5550acf75b0c2730f5dd1873751daf9beb8f11b44027778fae50d7feca" -"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" -"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -"checksum redox_syscall 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "679da7508e9a6390aeaf7fbd02a800fdc64b73fe2204dd2c8ae66d22d9d5ad5d" -"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" -"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" -"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" -"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" -"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" -"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf" -"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" -"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" -"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" -"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef" -"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c" -"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811" -"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e" -"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" -"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" -"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" -"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" -"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" -"checksum tempfile 3.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b86c784c88d98c801132806dadd3819ed29d8600836c4088e855cdf3e178ed8a" -"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" -"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" -"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" -"checksum tiny_http 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1661fa0a44c95d01604bd05c66732a446c657efb62b5164a7a083a3b552b4951" -"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" -"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" -"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" -"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" -"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" -"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" -"checksum webbrowser 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c01efd7cb6939b7f34983f1edff0550e5b21b49e2db4495656295922df8939ac" -"checksum widestring 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "effc0e4ff8085673ea7b9b2e3c73f6bd4d118810c9009ed8f1e16bd96c331db6" -"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..764f411a8c --- /dev/null +++ b/Makefile @@ -0,0 +1,71 @@ +VERSION := 0.6.3 + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# collect sources +ifneq ($(AMALGAMATED),1) + SRC := $(wildcard lib/src/*.c) + # do not double-include amalgamation + SRC := $(filter-out lib/src/lib.c,$(SRC)) +else + # use amalgamated build + SRC := lib/src/lib.c +endif +OBJ := $(SRC:.c=.o) + +# define default flags, and override to append mandatory flags +CFLAGS ?= -O3 -Wall -Wextra -Werror +override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include + +# ABI versioning +SONAME_MAJOR := 0 +SONAME_MINOR := 0 + +# OS-specific bits +ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SONAME_MAJOR).dylib +else + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR) +endif +ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly)) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: libtree-sitter.a libtree-sitter.$(SOEXTVER) + +libtree-sitter.a: $(OBJ) + $(AR) rcs $@ $^ + +libtree-sitter.$(SOEXTVER): $(OBJ) + $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ + ln -sf $@ libtree-sitter.$(SOEXT) + ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) + +install: all + install -d '$(DESTDIR)$(LIBDIR)' + install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a + install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) + ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter + install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ + install -d '$(DESTDIR)$(PCLIBDIR)' + sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' \ + tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + +clean: + rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) + +.PHONY: all install clean diff --git a/README.md b/README.md index e5e0ba96e6..864c195abb 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: -* **General** enough to parse any programming language -* **Fast** enough to parse on every keystroke in a text editor -* **Robust** enough to provide useful results even in the presence of syntax errors -* **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application +- **General** enough to parse any programming language +- **Fast** enough to parse on every keystroke in a text editor +- **Robust** enough to provide useful results even in the presence of syntax errors +- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application [Documentation](https://tree-sitter.github.io/tree-sitter/) @@ -50,3 +50,9 @@ This is a partial wrapper. Some things still to be done: * Logging * DOT graphs +## Links + +- [Documentation](https://tree-sitter.github.io) +- [Rust binding](lib/binding_rust/README.md) +- [WASM binding](lib/binding_web/README.md) +- [Command-line interface](cli/README.md) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 27706945a2..47becc3dff 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.16.5" +version = "0.18.0" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" @@ -20,7 +20,8 @@ harness = false [dependencies] ansi_term = "0.11" -cc = "1.0" +cc = "^1.0.58" +atty = "0.2" clap = "2.32" difference = "2.0" dirs = "2.0.2" @@ -35,13 +36,14 @@ serde_derive = "1.0" smallbitvec = "2.3.0" tiny_http = "0.6" webbrowser = "0.5.1" +html-escape = "0.2.6" [dependencies.tree-sitter] -version = ">= 0.3.7" +version = ">= 0.17.0" path = "../lib" [dependencies.tree-sitter-highlight] -version = ">= 0.1.0" +version = ">= 0.3.0" path = "../highlight" [dependencies.tree-sitter-tags] diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index 50ee5370e1..53ab3fea23 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -2,8 +2,8 @@ use lazy_static::lazy_static; use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::time::Instant; -use std::{env, fs, usize}; -use tree_sitter::{Language, Parser}; +use std::{env, fs, str, usize}; +use tree_sitter::{Language, Parser, Query}; use tree_sitter_cli::error::Error; use tree_sitter_cli::loader::Loader; @@ -18,26 +18,33 @@ lazy_static! { .map(|s| usize::from_str_radix(&s, 10).unwrap()) .unwrap_or(5); static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone()); - static ref EXAMPLE_PATHS_BY_LANGUAGE_DIR: BTreeMap> = { - fn process_dir(result: &mut BTreeMap>, dir: &Path) { + static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap, Vec)> = { + fn process_dir(result: &mut BTreeMap, Vec)>, dir: &Path) { if dir.join("grammar.js").exists() { let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap(); + let (example_paths, query_paths) = + result.entry(relative_path.to_owned()).or_default(); + if let Ok(example_files) = fs::read_dir(&dir.join("examples")) { - result.insert( - relative_path.to_owned(), - example_files - .filter_map(|p| { - let p = p.unwrap().path(); - if p.is_file() { - Some(p) - } else { - None - } - }) - .collect(), - ); - } else { - result.insert(relative_path.to_owned(), Vec::new()); + example_paths.extend(example_files.filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p.to_owned()) + } else { + None + } + })); + } + + if let Ok(query_files) = fs::read_dir(&dir.join("queries")) { + query_paths.extend(query_files.filter_map(|p| { + let p = p.unwrap().path(); + if p.is_file() { + Some(p.to_owned()) + } else { + None + } + })); } } else { for entry in fs::read_dir(&dir).unwrap() { @@ -56,20 +63,25 @@ lazy_static! { } fn main() { - let mut parser = Parser::new(); - let max_path_length = EXAMPLE_PATHS_BY_LANGUAGE_DIR - .iter() - .flat_map(|(_, paths)| paths.iter()) - .map(|p| p.file_name().unwrap().to_str().unwrap().chars().count()) + let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR + .values() + .flat_map(|(e, q)| { + e.iter() + .chain(q.iter()) + .map(|s| s.file_name().unwrap().to_str().unwrap().len()) + }) .max() - .unwrap(); + .unwrap_or(0); + eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT); + + let mut parser = Parser::new(); let mut all_normal_speeds = Vec::new(); let mut all_error_speeds = Vec::new(); - eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT); - - for (language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() { + for (language_path, (example_paths, query_paths)) in + EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() + { let language_name = language_path.file_name().unwrap().to_str().unwrap(); if let Some(filter) = LANGUAGE_FILTER.as_ref() { @@ -79,9 +91,24 @@ fn main() { } eprintln!("\nLanguage: {}", language_name); - parser.set_language(get_language(language_path)).unwrap(); + let language = get_language(language_path); + parser.set_language(language).unwrap(); + + eprintln!(" Constructing Queries"); + for path in query_paths { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !path.to_str().unwrap().contains(filter.as_str()) { + continue; + } + } + + parse(&path, max_path_length, |source| { + Query::new(language, str::from_utf8(source).unwrap()) + .expect("Failed to parse query"); + }); + } - eprintln!(" Normal examples:"); + eprintln!(" Parsing Valid Code:"); let mut normal_speeds = Vec::new(); for example_path in example_paths { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -90,12 +117,16 @@ fn main() { } } - normal_speeds.push(parse(&mut parser, example_path, max_path_length)); + normal_speeds.push(parse(example_path, max_path_length, |code| { + parser.parse(code, None).expect("Failed to parse"); + })); } - eprintln!(" Error examples (mismatched languages):"); + eprintln!(" Parsing Invalid Code (mismatched languages):"); let mut error_speeds = Vec::new(); - for (other_language_path, example_paths) in EXAMPLE_PATHS_BY_LANGUAGE_DIR.iter() { + for (other_language_path, (example_paths, _)) in + EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() + { if other_language_path != language_path { for example_path in example_paths { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -104,7 +135,9 @@ fn main() { } } - error_speeds.push(parse(&mut parser, example_path, max_path_length)); + error_speeds.push(parse(example_path, max_path_length, |code| { + parser.parse(code, None).expect("Failed to parse"); + })); } } } @@ -123,7 +156,7 @@ fn main() { all_error_speeds.extend(error_speeds); } - eprintln!("\nOverall"); + eprintln!("\n Overall"); if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); @@ -151,28 +184,25 @@ fn aggregate(speeds: &Vec) -> Option<(usize, usize)> { Some((total / speeds.len(), max)) } -fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> usize { +fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize { eprint!( " {:width$}\t", - example_path.file_name().unwrap().to_str().unwrap(), + path.file_name().unwrap().to_str().unwrap(), width = max_path_length ); - let source_code = fs::read(example_path) - .map_err(Error::wrap(|| format!("Failed to read {:?}", example_path))) + let source_code = fs::read(path) + .map_err(Error::wrap(|| format!("Failed to read {:?}", path))) .unwrap(); let time = Instant::now(); for _ in 0..*REPETITION_COUNT { - parser - .parse(&source_code, None) - .expect("Incompatible language version"); + action(&source_code); } let duration = time.elapsed() / (*REPETITION_COUNT as u32); - let duration_ms = - duration.as_secs() as f64 * 1000.0 + duration.subsec_nanos() as f64 / 1000000.0; - let speed = (source_code.len() as f64 / duration_ms) as usize; + let duration_ms = duration.as_millis(); + let speed = source_code.len() as u128 / (duration_ms + 1); eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed); - speed + speed as usize } fn get_language(path: &Path) -> Language { diff --git a/cli/build.rs b/cli/build.rs index 0ed9ef067d..47506018a0 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { @@ -6,12 +6,25 @@ fn main() { println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha); } + if wasm_files_present() { + println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING"); + } + println!( "cargo:rustc-env=BUILD_TARGET={}", std::env::var("TARGET").unwrap() ); } +fn wasm_files_present() -> bool { + let paths = [ + "../lib/binding_web/tree-sitter.js", + "../lib/binding_web/tree-sitter.wasm", + ]; + + paths.iter().all(|p| Path::new(p).exists()) +} + fn read_git_sha() -> Option { let mut repo_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); @@ -51,7 +64,6 @@ fn read_git_sha() -> Option { } return fs::read_to_string(&ref_filename).ok(); } - // If we're on a detached commit, then the `HEAD` file itself contains the sha. else if head_content.len() == 40 { return Some(head_content); diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts new file mode 100644 index 0000000000..b9bf1c9814 --- /dev/null +++ b/cli/npm/dsl.d.ts @@ -0,0 +1,356 @@ +type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; +type BlankRule = {type: 'BLANK'}; +type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; +type FieldRule = {type: 'FIELD'; name: string; content: Rule}; +type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; +type PatternRule = {type: 'PATTERN'; value: string}; +type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; +type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; +type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; +type PrecRule = {type: 'PREC'; content: Rule; value: number}; +type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; +type RepeatRule = {type: 'REPEAT'; content: Rule}; +type SeqRule = {type: 'SEQ'; members: Rule[]}; +type StringRule = {type: 'STRING'; value: string}; +type SymbolRule = {type: 'SYMBOL'; name: Name}; +type TokenRule = {type: 'TOKEN'; content: Rule}; + +type Rule = + | AliasRule + | BlankRule + | ChoiceRule + | FieldRule + | ImmediateTokenRule + | PatternRule + | PrecDynamicRule + | PrecLeftRule + | PrecRightRule + | PrecRule + | Repeat1Rule + | RepeatRule + | SeqRule + | StringRule + | SymbolRule + | TokenRule; + +type RuleOrLiteral = Rule | RegExp | string; + +type GrammarSymbols = { + [name in RuleName]: SymbolRule; +} & + Record>; + +type RuleBuilder = ( + $: GrammarSymbols, +) => RuleOrLiteral; + +type RuleBuilders< + RuleName extends string, + BaseGrammarRuleName extends string +> = { + [name in RuleName]: RuleBuilder; +}; + +interface Grammar< + RuleName extends string, + BaseGrammarRuleName extends string = never, + Rules extends RuleBuilders = RuleBuilders< + RuleName, + BaseGrammarRuleName + > +> { + /** + * Name of the grammar language. + */ + name: string; + + /** Mapping of grammar rule names to rule builder functions. */ + rules: Rules; + + /** + * An array of arrays of rule names. Each inner array represents a set of + * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ + * in the grammar. When these conflicts occur at runtime, Tree-sitter will + * use the GLR algorithm to explore all of the possible interpretations. If + * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree + * whose corresponding rule has the highest total _dynamic precedence_. + * + * @param $ grammar rules + */ + conflicts?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[][]; + + /** + * An array of token names which can be returned by an _external scanner_. + * External scanners allow you to write custom C code which runs during the + * lexing process in order to handle lexical rules (e.g. Python's indentation + * tokens) that cannot be described by regular expressions. + * + * @param $ grammar rules + * @param previous array of externals from the base schema, if any + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + */ + externals?: ( + $: Record>, + previous: Rule[], + ) => SymbolRule[]; + + /** + * An array of tokens that may appear anywhere in the language. This + * is often used for whitespace and comments. The default value of + * extras is to accept whitespace. To control whitespace explicitly, + * specify extras: `$ => []` in your grammar. + * + * @param $ grammar rules + */ + extras?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * An array of rules that should be automatically removed from the + * grammar by replacing all of their usages with a copy of their definition. + * This is useful for rules that are used in multiple places but for which + * you don't want to create syntax tree nodes at runtime. + * + * @param $ grammar rules + */ + inline?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * A list of hidden rule names that should be considered supertypes in the + * generated node types file. + * + * @param $ grammar rules + * + * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + */ + supertypes?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * The name of a token that will match keywords for the purpose of the + * keyword extraction optimization. + * + * @param $ grammar rules + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction + */ + word?: ($: GrammarSymbols) => RuleOrLiteral; +} + +type GrammarSchema = { + [K in keyof Grammar]: K extends 'rules' + ? Record + : Grammar[K]; +}; + +/** + * Causes the given rule to appear with an alternative name in the syntax tree. + * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an + * anonymous node, as if the rule had been written as the simple string. + * + * @param rule rule that will be aliased + * @param name target name for the alias + */ +declare function alias(rule: RuleOrLiteral, name: string): AliasRule; + +/** + * Causes the given rule to appear as an alternative named node, for instance + * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named + * node called `bar`. + * + * @param rule rule that will be aliased + * @param symbol target symbol for the alias + */ +declare function alias( + rule: RuleOrLiteral, + symbol: SymbolRule, +): AliasRule; + +/** + * Creates a blank rule, matching nothing. + */ +declare function blank(): BlankRule; + +/** + * Assigns a field name to the child node(s) matched by the given rule. + * In the resulting syntax tree, you can then use that field name to + * access specific children. + * + * @param name name of the field + * @param rule rule the field should match + */ +declare function field(name: string, rule: RuleOrLiteral): FieldRule; + +/** + * Creates a rule that matches one of a set of possible rules. The order + * of the arguments does not matter. This is analogous to the `|` (pipe) + * operator in EBNF notation. + * + * @param options possible rule choices + */ +declare function choice(...options: RuleOrLiteral[]): ChoiceRule; + +/** + * Creates a rule that matches zero or one occurrence of a given rule. + * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. + * + * @param value rule to be made optional + */ +declare function optional(rule: RuleOrLiteral): ChoiceRule; + +/** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at parser-generation time. When two rules overlap + * in a way that represents either a true ambiguity or a _local_ ambiguity + * given one token of lookahead, Tree-sitter will try to resolve the conflict by + * matching the rule with the higher precedence. The default precedence of all + * rules is zero. This works similarly to the precedence directives in Yacc grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ +declare const prec: { + (number: number, rule: RuleOrLiteral): PrecRule; + + /** + * Marks the given rule as left-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a left-associative rule, Tree-sitter + * will prefer matching a rule that ends _earlier_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as left-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + left(rule: RuleOrLiteral): PrecLeftRule; + left(number: number, rule: RuleOrLiteral): PrecLeftRule; + + /** + * Marks the given rule as right-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a right-associative rule, Tree-sitter + * will prefer matching a rule that ends _later_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param number (optional) precedence weight + * @param rule rule to mark as right-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + right(rule: RuleOrLiteral): PrecRightRule; + right(number: number, rule: RuleOrLiteral): PrecRightRule; + + /** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. + * This is only necessary when handling a conflict dynamically using the + * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: + * multiple rules correctly match a given piece of code. In that event, + * Tree-sitter compares the total dynamic precedence associated with each + * rule, and selects the one with the highest total. This is similar to + * dynamic precedence directives in Bison grammars. + * + * @param number precedence weight + * @param rule rule being weighted + * + * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html + */ + dynamic(number: number, rule: RuleOrLiteral): PrecDynamicRule; +}; + +/** + * Creates a rule that matches _zero-or-more_ occurrences of a given rule. + * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This + * rule is implemented in terms of `repeat1` but is included because it + * is very commonly used. + * + * @param rule rule to repeat, zero or more times + */ +declare function repeat(rule: RuleOrLiteral): RepeatRule; + +/** + * Creates a rule that matches one-or-more occurrences of a given rule. + * + * @param rule rule to repeat, one or more times + */ +declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; + +/** + * Creates a rule that matches any number of other rules, one after another. + * It is analogous to simply writing multiple symbols next to each other + * in EBNF notation. + * + * @param rules ordered rules that comprise the sequence + */ +declare function seq(...rules: RuleOrLiteral[]): SeqRule; + +/** + * Creates a symbol rule, representing another rule in the grammar by name. + * + * @param name name of the target rule + */ +declare function sym(name: Name): SymbolRule; + +/** + * Marks the given rule as producing only a single token. Tree-sitter's + * default is to treat each String or RegExp literal in the grammar as a + * separate token. Each token is matched separately by the lexer and + * returned as its own leaf node in the tree. The token function allows + * you to express a complex rule using the DSL functions (rather + * than as a single regular expression) but still have Tree-sitter treat + * it as a single token. + * + * @param rule rule to represent as a single token + */ +declare const token: { + (rule: RuleOrLiteral): TokenRule; + + /** + * Marks the given rule as producing an immediate token. This allows + * the parser to produce a different token based on whether or not + * there are `extras` preceding the token's main content. When there + * are _no_ leading `extras`, an immediate token is preferred over a + * normal token which would otherwise match. + * + * @param rule rule to represent as an immediate token + */ + immediate(rule: RuleOrLiteral): ImmediateTokenRule; +}; + +/** + * Creates a new language grammar with the provided schema. + * + * @param options grammar options + */ +declare function grammar( + options: Grammar, +): GrammarSchema; + +/** + * Extends an existing language grammar with the provided options, + * creating a new language. + * + * @param baseGrammar base grammar schema to extend from + * @param options grammar options for the new extended language + */ +declare function grammar< + BaseGrammarRuleName extends string, + RuleName extends string +>( + baseGrammar: GrammarSchema, + options: Grammar, +): GrammarSchema; diff --git a/cli/npm/package.json b/cli/npm/package.json index ad46e20d70..85cf5da678 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.16.5", + "version": "0.18.0", "author": "Max Brunsfeld", "license": "MIT", "repository": { diff --git a/cli/src/error.rs b/cli/src/error.rs index 824bd92fab..63b57c9e42 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,7 +1,7 @@ use super::test_highlight; use std::fmt::Write; use std::io; -use tree_sitter::QueryError; +use tree_sitter::{QueryError, QueryErrorKind}; #[derive(Debug)] pub struct Error(pub Vec); @@ -51,27 +51,19 @@ impl Error { } } -impl<'a> From for Error { - fn from(error: QueryError) -> Self { - match error { - QueryError::Capture(row, c) => Error::new(format!( - "Query error on line {}: Invalid capture name {}", - row, c - )), - QueryError::Field(row, f) => Error::new(format!( - "Query error on line {}: Invalid field name {}", - row, f - )), - QueryError::NodeType(row, t) => Error::new(format!( - "Query error on line {}. Invalid node type {}", - row, t - )), - QueryError::Syntax(row, l) => Error::new(format!( - "Query error on line {}. Invalid syntax:\n{}", - row, l - )), - QueryError::Predicate(p) => Error::new(format!("Query error: {}", p)), +impl<'a> From<(&str, QueryError)> for Error { + fn from((path, error): (&str, QueryError)) -> Self { + let mut msg = format!("Query error at {}:{}. ", path, error.row + 1); + match error.kind { + QueryErrorKind::Capture => write!(&mut msg, "Invalid capture name {}", error.message), + QueryErrorKind::Field => write!(&mut msg, "Invalid field name {}", error.message), + QueryErrorKind::NodeType => write!(&mut msg, "Invalid node type {}", error.message), + QueryErrorKind::Syntax => write!(&mut msg, "Invalid syntax:\n{}", error.message), + QueryErrorKind::Structure => write!(&mut msg, "Impossible pattern:\n{}", error.message), + QueryErrorKind::Predicate => write!(&mut msg, "Invalid predicate: {}", error.message), } + .unwrap(); + Self::new(msg) } } @@ -83,7 +75,7 @@ impl<'a> From for Error { impl<'a> From for Error { fn from(error: tree_sitter_tags::Error) -> Self { - Error::new(format!("{:?}", error)) + Error::new(format!("{}", error)) } } diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 5d8f7f0fb2..d159a2c4fc 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -68,6 +68,7 @@ impl<'a> Minimizer<'a> { .. } => { if !self.simple_aliases.contains_key(&symbol) + && !self.syntax_grammar.supertype_symbols.contains(&symbol) && !aliased_symbols.contains(&symbol) && self.syntax_grammar.variables[symbol.index].kind != VariableType::Named @@ -199,6 +200,9 @@ impl<'a> Minimizer<'a> { right_state: &ParseState, group_ids_by_state_id: &Vec, ) -> bool { + if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra { + return true; + } for (token, left_entry) in &left_state.terminal_entries { if let Some(right_entry) = right_state.terminal_entries.get(token) { if self.entries_conflict( diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 55594871e8..62fb1d70e8 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -292,7 +292,12 @@ function grammar(baseGrammar, options) { extras = options.extras .call(ruleBuilder, ruleBuilder, baseGrammar.extras) - .map(normalize); + + if (!Array.isArray(extras)) { + throw new Error("Grammar's 'extras' function must return an array.") + } + + extras = extras.map(normalize); } let word = baseGrammar.word; diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 12a59e1bb4..830c4a65b4 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -31,16 +31,9 @@ lazy_static! { .unwrap(); } -const NEW_HEADER_PARTS: [&'static str; 2] = [ - " - uint32_t large_state_count; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSSymbol *public_symbol_map;", - " -#define SMALL_STATE(id) id - LARGE_STATE_COUNT -", -]; +const NEW_HEADER_PARTS: &[&'static str] = &[" + const uint16_t *alias_map; + uint32_t state_count;"]; struct GeneratedParser { c_code: String, @@ -101,7 +94,7 @@ pub fn generate_parser_in_directory( } else { let mut header = tree_sitter::PARSER_HEADER.to_string(); - for part in &NEW_HEADER_PARTS { + for part in NEW_HEADER_PARTS.iter() { let pos = header .find(part) .expect("Missing expected part of parser.h header"); diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs index abab873964..4cbfaaa325 100644 --- a/cli/src/generate/nfa.rs +++ b/cli/src/generate/nfa.rs @@ -1,8 +1,10 @@ use std::char; use std::cmp::max; use std::cmp::Ordering; +use std::collections::HashSet; use std::fmt; use std::mem::swap; +use std::ops::Range; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum CharacterSet { @@ -178,6 +180,40 @@ impl CharacterSet { } } + pub fn ranges<'a>( + chars: &'a Vec, + ruled_out_characters: &'a HashSet, + ) -> impl Iterator> + 'a { + let mut prev_range: Option> = None; + chars + .iter() + .map(|c| (*c, false)) + .chain(Some(('\0', true))) + .filter_map(move |(c, done)| { + if done { + return prev_range.clone(); + } + if ruled_out_characters.contains(&(c as u32)) { + return None; + } + if let Some(range) = prev_range.clone() { + let mut prev_range_successor = range.end as u32 + 1; + while prev_range_successor < c as u32 { + if !ruled_out_characters.contains(&prev_range_successor) { + prev_range = Some(c..c); + return Some(range); + } + prev_range_successor += 1; + } + prev_range = Some(range.start..c); + None + } else { + prev_range = Some(c..c); + None + } + }) + } + #[cfg(test)] pub fn contains(&self, c: char) -> bool { match self { @@ -725,7 +761,7 @@ mod tests { .add_range('d', 'e') ); - // A whitelist and an intersecting blacklist. + // An inclusion and an intersecting exclusion. // Both sets contain 'e', 'f', and 'm' let mut a = CharacterSet::empty() .add_range('c', 'h') @@ -755,7 +791,7 @@ mod tests { assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l'])); assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate()); - // A blacklist and an overlapping blacklist. + // An exclusion and an overlapping inclusion. // Both sets exclude 'c', 'd', and 'e' let mut a = CharacterSet::empty().add_range('a', 'e').negate(); let mut b = CharacterSet::empty().add_range('c', 'h').negate(); @@ -766,7 +802,7 @@ mod tests { assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h'])); assert_eq!(b, CharacterSet::Include(vec!['a', 'b'])); - // A blacklist and a larger blacklist. + // An exclusion and a larger exclusion. let mut a = CharacterSet::empty().add_range('b', 'c').negate(); let mut b = CharacterSet::empty().add_range('a', 'd').negate(); assert_eq!( @@ -825,4 +861,45 @@ mod tests { assert!(a.does_intersect(&b)); assert!(b.does_intersect(&a)); } + + #[test] + fn test_character_set_get_ranges() { + struct Row { + chars: Vec, + ruled_out_chars: Vec, + expected_ranges: Vec>, + } + + let table = [ + Row { + chars: vec!['a'], + ruled_out_chars: vec![], + expected_ranges: vec!['a'..'a'], + }, + Row { + chars: vec!['a', 'b', 'c', 'e', 'z'], + ruled_out_chars: vec![], + expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], + }, + Row { + chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], + ruled_out_chars: vec!['d', 'f', 'g'], + expected_ranges: vec!['a'..'h', 'z'..'z'], + }, + ]; + + for Row { + chars, + ruled_out_chars, + expected_ranges, + } in table.iter() + { + let ruled_out_chars = ruled_out_chars + .into_iter() + .map(|c: &char| *c as u32) + .collect(); + let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + assert_eq!(ranges, *expected_ranges); + } + } } diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index b5bf1515ba..bc5a836fab 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -19,7 +19,7 @@ pub(crate) struct FieldInfo { #[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, - pub child_types: Vec, + pub children: FieldInfo, pub children_without_fields: FieldInfo, pub has_multi_step_production: bool, } @@ -70,7 +70,7 @@ impl Default for FieldInfoJSON { impl Default for ChildQuantity { fn default() -> Self { - Self::zero() + Self::one() } } @@ -146,7 +146,7 @@ impl ChildQuantity { pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, ) -> Result> { let child_type_is_visible = |t: &ChildType| { variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous @@ -158,7 +158,7 @@ pub(crate) fn get_variable_info( // Each variable's summary can depend on the summaries of other hidden variables, // and variables can have mutually recursive structure. So we compute the summaries - // iteratively, in a loop that terminates only when more changes are possible. + // iteratively, in a loop that terminates only when no more changes are possible. let mut did_change = true; let mut all_initialized = false; let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()]; @@ -168,13 +168,14 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { let mut variable_info = result[i].clone(); - // Within a variable, consider each production separately. For each - // production, determine which children and fields can occur, and how many - // times they can occur. - for (production_index, production) in variable.productions.iter().enumerate() { - let mut field_quantities = HashMap::new(); - let mut children_without_fields_quantity = ChildQuantity::zero(); - let mut has_uninitialized_invisible_children = false; + // Examine each of the variable's productions. The variable's child types can be + // immediately combined across all productions, but the child quantities must be + // recorded separately for each production. + for production in &variable.productions { + let mut production_field_quantities = HashMap::new(); + let mut production_children_quantity = ChildQuantity::zero(); + let mut production_children_without_fields_quantity = ChildQuantity::zero(); + let mut production_has_uninitialized_invisible_children = false; if production.steps.len() > 1 { variable_info.has_multi_step_production = true; @@ -184,117 +185,103 @@ pub(crate) fn get_variable_info( let child_symbol = step.symbol; let child_type = if let Some(alias) = &step.alias { ChildType::Aliased(alias.clone()) - } else if let Some(alias) = simple_aliases.get(&step.symbol) { + } else if let Some(alias) = default_aliases.get(&step.symbol) { ChildType::Aliased(alias.clone()) } else { ChildType::Normal(child_symbol) }; - // Record all of the types of direct children. - did_change |= sorted_vec_insert(&mut variable_info.child_types, &child_type); + let child_is_hidden = !child_type_is_visible(&child_type) + && !syntax_grammar.supertype_symbols.contains(&child_symbol); - // Record all of the field names that occur. + // Maintain the set of all child types for this variable, and the quantity of + // visible children in this production. + did_change |= + extend_sorted(&mut variable_info.children.types, Some(&child_type)); + if !child_is_hidden { + production_children_quantity.append(ChildQuantity::one()); + } + + // Maintain the set of child types associated with each field, and the quantity + // of children associated with each field in this production. if let Some(field_name) = &step.field_name { - // Record how many times each field occurs in this production. - field_quantities + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert(FieldInfo::default()); + did_change |= extend_sorted(&mut field_info.types, Some(&child_type)); + + let production_field_quantity = production_field_quantities .entry(field_name) - .or_insert(ChildQuantity::zero()) - .append(ChildQuantity::one()); - - // Record the types of children for this field. - let field_info = - variable_info.fields.entry(field_name.clone()).or_insert({ - let mut info = FieldInfo { - types: Vec::new(), - quantity: ChildQuantity::one(), - }; - - // If this field did *not* occur in an earlier production, - // then it is not required. - if production_index > 0 { - info.quantity.required = false; - } - info - }); - did_change |= sorted_vec_insert(&mut field_info.types, &child_type); + .or_insert(ChildQuantity::zero()); + + // Inherit the types and quantities of hidden children associated with fields. + if child_is_hidden && child_symbol.is_non_terminal() { + let child_variable_info = &result[child_symbol.index]; + did_change |= extend_sorted( + &mut field_info.types, + &child_variable_info.children.types, + ); + production_field_quantity.append(child_variable_info.children.quantity); + } else { + production_field_quantity.append(ChildQuantity::one()); + } } - // Record named children without fields. + // Maintain the set of named children without fields within this variable. else if child_type_is_named(&child_type) { - // Record how many named children without fields occur in this production. - children_without_fields_quantity.append(ChildQuantity::one()); - - // Record the types of all of the named children without fields. - let children_info = &mut variable_info.children_without_fields; - if children_info.types.is_empty() { - children_info.quantity = ChildQuantity::one(); - } - did_change |= sorted_vec_insert(&mut children_info.types, &child_type); + production_children_without_fields_quantity.append(ChildQuantity::one()); + did_change |= extend_sorted( + &mut variable_info.children_without_fields.types, + Some(&child_type), + ); } - // Inherit information from any hidden children. - if child_symbol.is_non_terminal() - && !syntax_grammar.supertype_symbols.contains(&child_symbol) - && step.alias.is_none() - && !child_type_is_visible(&child_type) - { + // Inherit all child information from hidden children. + if child_is_hidden && child_symbol.is_non_terminal() { let child_variable_info = &result[child_symbol.index]; - // If a hidden child can have multiple children, then this - // node can appear to have multiple children. + // If a hidden child can have multiple children, then its parent node can + // appear to have multiple children. if child_variable_info.has_multi_step_production { variable_info.has_multi_step_production = true; } - // Inherit fields from this hidden child + // If a hidden child has fields, then the parent node can appear to have + // those same fields. for (field_name, child_field_info) in &child_variable_info.fields { - field_quantities + production_field_quantities .entry(field_name) .or_insert(ChildQuantity::zero()) .append(child_field_info.quantity); - let field_info = variable_info - .fields - .entry(field_name.clone()) - .or_insert(FieldInfo { - types: Vec::new(), - quantity: ChildQuantity::one(), - }); - for child_type in &child_field_info.types { - sorted_vec_insert(&mut field_info.types, &child_type); - } + did_change |= extend_sorted( + &mut variable_info + .fields + .entry(field_name.clone()) + .or_insert(FieldInfo::default()) + .types, + &child_field_info.types, + ); } - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { - did_change |= - sorted_vec_insert(&mut variable_info.child_types, child_type); - } + // If a hidden child has children, then the parent node can appear to have + // those same children. + production_children_quantity.append(child_variable_info.children.quantity); + did_change |= extend_sorted( + &mut variable_info.children.types, + &child_variable_info.children.types, + ); - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - did_change |= sorted_vec_insert(&mut field_info.types, &child_type); - } - } - // Inherit info about children without fields from this hidden child. - else { + // If a hidden child can have named children without fields, then the parent + // node can appear to have those same children. + if step.field_name.is_none() { let grandchildren_info = &child_variable_info.children_without_fields; if !grandchildren_info.types.is_empty() { - children_without_fields_quantity - .append(grandchildren_info.quantity); - - if variable_info.children_without_fields.types.is_empty() { - variable_info.children_without_fields.quantity = - ChildQuantity::one(); - } - - for child_type in &grandchildren_info.types { - did_change |= sorted_vec_insert( - &mut variable_info.children_without_fields.types, - &child_type, - ); - } + production_children_without_fields_quantity + .append(child_variable_info.children_without_fields.quantity); + did_change |= extend_sorted( + &mut variable_info.children_without_fields.types, + &child_variable_info.children_without_fields.types, + ); } } } @@ -302,22 +289,27 @@ pub(crate) fn get_variable_info( // Note whether or not this production contains children whose summaries // have not yet been computed. if child_symbol.index >= i && !all_initialized { - has_uninitialized_invisible_children = true; + production_has_uninitialized_invisible_children = true; } } // If this production's children all have had their summaries initialized, // then expand the quantity information with all of the possibilities introduced // by this production. - if !has_uninitialized_invisible_children { + if !production_has_uninitialized_invisible_children { + did_change |= variable_info + .children + .quantity + .union(production_children_quantity); + did_change |= variable_info .children_without_fields .quantity - .union(children_without_fields_quantity); + .union(production_children_without_fields_quantity); for (field_name, info) in variable_info.fields.iter_mut() { did_change |= info.quantity.union( - field_quantities + production_field_quantities .get(field_name) .cloned() .unwrap_or(ChildQuantity::zero()), @@ -333,15 +325,8 @@ pub(crate) fn get_variable_info( } for supertype_symbol in &syntax_grammar.supertype_symbols { - let variable = &syntax_grammar.variables[supertype_symbol.index]; - if variable.kind != VariableType::Hidden { - return Err(Error::grammar(&format!( - "Supertype symbols must be hidden, but `{}` is not", - variable.name - ))); - } - if result[supertype_symbol.index].has_multi_step_production { + let variable = &syntax_grammar.variables[supertype_symbol.index]; return Err(Error::grammar(&format!( "Supertype symbols must always have a single visible child, but `{}` can have multiple", variable.name @@ -352,13 +337,15 @@ pub(crate) fn get_variable_info( // Update all of the node type lists to eliminate hidden nodes. for supertype_symbol in &syntax_grammar.supertype_symbols { result[supertype_symbol.index] - .child_types + .children + .types .retain(child_type_is_visible); } for variable_info in result.iter_mut() { for (_, field_info) in variable_info.fields.iter_mut() { field_info.types.retain(child_type_is_visible); } + variable_info.fields.retain(|_, v| !v.types.is_empty()); variable_info .children_without_fields .types @@ -371,7 +358,7 @@ pub(crate) fn get_variable_info( pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, + default_aliases: &AliasMap, variable_info: &Vec, ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -382,7 +369,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(&symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -430,22 +417,33 @@ pub(crate) fn generate_node_types_json( }; let mut aliases_by_symbol = HashMap::new(); - for (symbol, alias) in simple_aliases { + for (symbol, alias) in default_aliases { aliases_by_symbol.insert(*symbol, { let mut aliases = HashSet::new(); aliases.insert(Some(alias.clone())); aliases }); } + for extra_symbol in &syntax_grammar.extra_symbols { + if !default_aliases.contains_key(extra_symbol) { + aliases_by_symbol + .entry(*extra_symbol) + .or_insert(HashSet::new()) + .insert(None); + } + } for variable in &syntax_grammar.variables { for production in &variable.productions { for step in &production.steps { - if !simple_aliases.contains_key(&step.symbol) { - aliases_by_symbol - .entry(step.symbol) - .or_insert(HashSet::new()) - .insert(step.alias.clone()); - } + aliases_by_symbol + .entry(step.symbol) + .or_insert(HashSet::new()) + .insert( + step.alias + .as_ref() + .or_else(|| default_aliases.get(&step.symbol)) + .cloned(), + ); } } } @@ -467,7 +465,8 @@ pub(crate) fn generate_node_types_json( subtypes: None, }); let mut subtypes = info - .child_types + .children + .types .iter() .map(child_type_to_node_type) .collect::>(); @@ -502,26 +501,34 @@ pub(crate) fn generate_node_types_json( // There may already be an entry with this name, because multiple // rules may be aliased with the same name. - let node_type_json = - node_types_json - .entry(kind.clone()) - .or_insert_with(|| NodeInfoJSON { - kind: kind.clone(), - named: is_named, - fields: Some(BTreeMap::new()), - children: None, - subtypes: None, - }); + let mut node_type_existed = true; + let node_type_json = node_types_json.entry(kind.clone()).or_insert_with(|| { + node_type_existed = false; + NodeInfoJSON { + kind: kind.clone(), + named: is_named, + fields: Some(BTreeMap::new()), + children: None, + subtypes: None, + } + }); let fields_json = node_type_json.fields.as_mut().unwrap(); - for (field, field_info) in info.fields.iter() { - populate_field_info_json( - &mut fields_json - .entry(field.clone()) - .or_insert(FieldInfoJSON::default()), - field_info, - ); + for (new_field, field_info) in info.fields.iter() { + let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| { + // If another rule is aliased with the same name, and does *not* have this field, + // then this field cannot be required. + let mut field_json = FieldInfoJSON::default(); + if node_type_existed { + field_json.required = false; + } + field_json + }); + populate_field_info_json(field_json, field_info); } + + // If another rule is aliased with the same name, any fields that aren't present in this + // cannot be required. for (existing_field, field_json) in fields_json.iter_mut() { if !info.fields.contains_key(existing_field) { field_json.required = false; @@ -678,16 +685,19 @@ fn variable_type_for_child_type( } } -fn sorted_vec_insert(vec: &mut Vec, value: &T) -> bool +fn extend_sorted<'a, T>(vec: &mut Vec, values: impl IntoIterator) -> bool where T: Clone + Eq + Ord, + T: 'a, { - if let Err(i) = vec.binary_search(&value) { - vec.insert(i, value.clone()); - true - } else { - false - } + values.into_iter().any(|value| { + if let Err(i) = vec.binary_search(&value) { + vec.insert(i, value.clone()); + true + } else { + false + } + }) } #[cfg(test)] @@ -723,9 +733,18 @@ mod tests { kind: VariableType::Named, rule: Rule::string("x"), }, + // This rule is not reachable from the start symbol + // so it won't be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, ], }); + assert_eq!(node_types.len(), 3); + assert_eq!( node_types[0], NodeInfoJSON { @@ -785,6 +804,112 @@ mod tests { ); } + #[test] + fn test_node_types_simple_extras() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: vec![Rule::named("v3")], + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::named("v2")), + Rule::field("f2".to_string(), Rule::string(";")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + // This rule is not reachable from the start symbol, but + // it is reachable from the 'extra_symbols' so it + // should be present in the node_types + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, + ], + }); + + assert_eq!(node_types.len(), 4); + + assert_eq!( + node_types[0], + NodeInfoJSON { + kind: "v1".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "v2".to_string(), + named: true, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: ";".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ) + } + ); + assert_eq!( + node_types[1], + NodeInfoJSON { + kind: ";".to_string(), + named: false, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[2], + NodeInfoJSON { + kind: "v2".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + assert_eq!( + node_types[3], + NodeInfoJSON { + kind: "v3".to_string(), + named: true, + subtypes: None, + children: None, + fields: None + } + ); + } + #[test] fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { @@ -1170,7 +1295,39 @@ mod tests { } #[test] - fn test_node_types_with_named_aliases() { + fn test_node_types_with_fields_on_hidden_tokens() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_symbols: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![Variable { + name: "script".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("a".to_string(), Rule::pattern("hi")), + Rule::field("b".to_string(), Rule::pattern("bye")), + ]), + }], + }); + + assert_eq!( + node_types, + [NodeInfoJSON { + kind: "script".to_string(), + named: true, + fields: Some(BTreeMap::new()), + children: None, + subtypes: None + }] + ); + } + + #[test] + fn test_node_types_with_multiple_rules_same_alias_name() { let node_types = get_node_types(InputGrammar { name: String::new(), extra_symbols: Vec::new(), @@ -1181,98 +1338,117 @@ mod tests { supertype_symbols: vec![], variables: vec![ Variable { - name: "expression".to_string(), - kind: VariableType::Named, - rule: Rule::choice(vec![Rule::named("yield"), Rule::named("argument_list")]), - }, - Variable { - name: "yield".to_string(), - kind: VariableType::Named, - rule: Rule::Seq(vec![Rule::string("YIELD")]), - }, - Variable { - name: "argument_list".to_string(), + name: "script".to_string(), kind: VariableType::Named, rule: Rule::choice(vec![ - Rule::named("x"), - Rule::alias(Rule::named("b"), "expression".to_string(), true), + Rule::named("a"), + // Rule `b` is aliased as rule `a` + Rule::alias(Rule::named("b"), "a".to_string(), true), ]), }, Variable { - name: "b".to_string(), + name: "a".to_string(), kind: VariableType::Named, - rule: Rule::choice(vec![ - Rule::field("f".to_string(), Rule::string("B")), - Rule::named("c"), + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::string("1")), + Rule::field("f2".to_string(), Rule::string("2")), ]), }, Variable { - name: "c".to_string(), - kind: VariableType::Named, - rule: Rule::seq(vec![Rule::string("C")]), - }, - Variable { - name: "x".to_string(), + name: "b".to_string(), kind: VariableType::Named, - rule: Rule::seq(vec![Rule::string("X")]), + rule: Rule::seq(vec![ + Rule::field("f2".to_string(), Rule::string("22")), + Rule::field("f2".to_string(), Rule::string("222")), + Rule::field("f3".to_string(), Rule::string("3")), + ]), }, ], }); assert_eq!( - node_types.iter().map(|n| &n.kind).collect::>(), - &[ - "argument_list", - "c", - "expression", - "x", - "yield", - "B", - "C", - "X", - "YIELD" - ] + &node_types + .iter() + .map(|t| t.kind.as_str()) + .collect::>(), + &["a", "script", "1", "2", "22", "222", "3"] ); + assert_eq!( - node_types[2], - NodeInfoJSON { - kind: "expression".to_string(), - named: true, - subtypes: None, - children: Some(FieldInfoJSON { - multiple: false, - required: false, - types: vec![ - NodeTypeJSON { - kind: "argument_list".to_string(), - named: true, - }, - NodeTypeJSON { - kind: "c".to_string(), - named: true, - }, - NodeTypeJSON { - kind: "yield".to_string(), + &node_types[0..2], + &[ + // A combination of the types for `a` and `b`. + NodeInfoJSON { + kind: "a".to_string(), + named: true, + subtypes: None, + children: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: false, + types: vec![NodeTypeJSON { + kind: "1".to_string(), + named: false, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: true, + required: true, + types: vec![ + NodeTypeJSON { + kind: "2".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "22".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "222".to_string(), + named: false, + } + ] + }, + ), + ( + "f3".to_string(), + FieldInfoJSON { + multiple: false, + required: false, + types: vec![NodeTypeJSON { + kind: "3".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ), + }, + NodeInfoJSON { + kind: "script".to_string(), + named: true, + subtypes: None, + // Only one node + children: Some(FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "a".to_string(), named: true, - }, - ] - }), - fields: Some( - vec![( - "f".to_string(), - FieldInfoJSON { - required: false, - multiple: false, - types: vec![NodeTypeJSON { - named: false, - kind: "B".to_string(), - }] - } - )] - .into_iter() - .collect() - ), - } + }] + }), + fields: Some(BTreeMap::new()), + } + ] ); } @@ -1434,6 +1610,71 @@ mod tests { ); } + #[test] + fn test_get_variable_info_with_repetitions_inside_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + // Field associated with a repetition. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1")], + }, + Production { + dynamic_precedence: 0, + steps: vec![], + }, + ], + }, + // Repetition node + SyntaxVariable { + name: "_rule0_repeat".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::non_terminal(1)), + ], + }, + ], + }, + ], + vec![], + ), + &build_lexical_grammar(), + &AliasMap::new(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + quantity: ChildQuantity { + exists: true, + required: false, + multiple: true, + }, + types: vec![ChildType::Normal(Symbol::terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + #[test] fn test_get_variable_info_with_inherited_fields() { let variable_info = get_variable_info( @@ -1570,14 +1811,14 @@ mod tests { } fn get_node_types(grammar: InputGrammar) -> Vec { - let (syntax_grammar, lexical_grammar, _, simple_aliases) = + let (syntax_grammar, lexical_grammar, _, default_aliases) = prepare_grammar(&grammar).unwrap(); let variable_info = - get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap(); + get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( &syntax_grammar, &lexical_grammar, - &simple_aliases, + &default_aliases, &variable_info, ) } diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index 2b88762bd8..9b594f3caa 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -12,7 +12,7 @@ use std::i32; lazy_static! { static ref CURLY_BRACE_REGEX: Regex = - Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-F,}][^}]*)\}"#).unwrap(); + Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap(); } const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/']; @@ -653,12 +653,15 @@ mod tests { Rule::pattern(r#"\{[ab]{3}\}"#), // Unicode codepoints Rule::pattern(r#"\u{1000A}"#), + // Unicode codepoints (lowercase) + Rule::pattern(r#"\u{1000b}"#), ], separators: vec![], examples: vec![ ("u{1234} ok", Some((0, "u{1234}"))), ("{aba}}", Some((1, "{aba}"))), ("\u{1000A}", Some((2, "\u{1000A}"))), + ("\u{1000b}", Some((3, "\u{1000b}"))), ], }, ]; diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs new file mode 100644 index 0000000000..3e08e3adbe --- /dev/null +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -0,0 +1,293 @@ +use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; +use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; + +#[derive(Clone, Default)] +struct SymbolStatus { + aliases: Vec<(Alias, usize)>, + appears_unaliased: bool, +} + +// Update the grammar by finding symbols that always are aliased, and for each such symbol, +// promoting one of its aliases to a "default alias", which is applied globally instead +// of in a context-specific way. +// +// This has two benefits: +// * It reduces the overhead of storing production-specific alias info in the parse table. +// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation +// ensures that the children of an `ERROR` node have symbols that are consistent with the +// way that they would appear in a valid syntax tree. +pub(super) fn extract_default_aliases( + syntax_grammar: &mut SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> AliasMap { + let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; + let mut non_terminal_status_list = + vec![SymbolStatus::default(); syntax_grammar.variables.len()]; + let mut external_status_list = + vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; + + // For each grammar symbol, find all of the aliases under which the symbol appears, + // and determine whether or not the symbol ever appears *unaliased*. + for variable in syntax_grammar.variables.iter() { + for production in variable.productions.iter() { + for step in production.steps.iter() { + let mut status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // Default aliases don't work for inlined variables. + if syntax_grammar.variables_to_inline.contains(&step.symbol) { + continue; + } + + if let Some(alias) = &step.alias { + if let Some(count_for_alias) = status + .aliases + .iter_mut() + .find_map(|(a, count)| if a == alias { Some(count) } else { None }) + { + *count_for_alias += 1; + } else { + status.aliases.push((alias.clone(), 1)); + } + } else { + status.appears_unaliased = true; + } + } + } + } + + let symbols_with_statuses = (terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::terminal(i), status))) + .chain( + non_terminal_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::non_terminal(i), status)), + ) + .chain( + external_status_list + .iter_mut() + .enumerate() + .map(|(i, status)| (Symbol::external(i), status)), + ); + + // For each symbol that always appears aliased, find the alias the occurs most often, + // and designate that alias as the symbol's "default alias". Store all of these + // default aliases in a map that will be returned. + let mut result = AliasMap::new(); + for (symbol, status) in symbols_with_statuses { + if status.appears_unaliased { + status.aliases.clear(); + } else { + if let Some(default_entry) = status + .aliases + .iter() + .enumerate() + .max_by_key(|(i, (_, count))| (count, -(*i as i64))) + .map(|(_, entry)| entry.clone()) + { + status.aliases.clear(); + status.aliases.push(default_entry.clone()); + result.insert(symbol, default_entry.0); + } + } + } + + // Wherever a symbol is aliased as its default alias, remove the usage of the alias, + // because it will now be redundant. + let mut alias_positions_to_clear = Vec::new(); + for variable in syntax_grammar.variables.iter_mut() { + alias_positions_to_clear.clear(); + + for (i, production) in variable.productions.iter().enumerate() { + for (j, step) in production.steps.iter().enumerate() { + let status = match step.symbol.kind { + SymbolType::External => &mut external_status_list[step.symbol.index], + SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], + SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], + SymbolType::End => panic!("Unexpected end token"), + }; + + // If this step is aliased as the symbol's default alias, then remove that alias. + if step.alias.is_some() + && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0) + { + let mut other_productions_must_use_this_alias_at_this_index = false; + for (other_i, other_production) in variable.productions.iter().enumerate() { + if other_i != i + && other_production.steps.len() > j + && other_production.steps[j].alias == step.alias + && result.get(&other_production.steps[j].symbol) != step.alias.as_ref() + { + other_productions_must_use_this_alias_at_this_index = true; + break; + } + } + + if !other_productions_must_use_this_alias_at_this_index { + alias_positions_to_clear.push((i, j)); + } + } + } + } + + for (production_index, step_index) in &alias_positions_to_clear { + variable.productions[*production_index].steps[*step_index].alias = None; + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{ + LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + }; + use crate::generate::nfa::Nfa; + + #[test] + fn test_extract_simple_aliases() { + let mut syntax_grammar = SyntaxGrammar { + variables: vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + }], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + // Token 0 is always aliased as "a1". + ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), + // Token 1 is aliased within rule `v1` above, but not here. + ProductionStep::new(Symbol::terminal(1)), + // Token 2 is aliased differently here than in `v1`. The alias from + // `v1` should be promoted to the default alias, because `v1` appears + // first in the grammar. + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + // Token 3 is also aliased differently here than in `v1`. In this case, + // this alias should be promoted to the default alias, because it is + // used a greater number of times (twice). + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), + ], + }], + }, + ], + extra_symbols: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), + external_tokens: Vec::new(), + word_token: None, + }; + + let lexical_grammar = LexicalGrammar { + nfa: Nfa::new(), + variables: vec![ + LexicalVariable { + name: "t0".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t1".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t2".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + LexicalVariable { + name: "t3".to_string(), + kind: VariableType::Anonymous, + implicit_precedence: 0, + start_state: 0, + }, + ], + }; + + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); + assert_eq!(default_aliases.len(), 3); + + assert_eq!( + default_aliases.get(&Symbol::terminal(0)), + Some(&Alias { + value: "a1".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(2)), + Some(&Alias { + value: "a3".to_string(), + is_named: true, + }) + ); + assert_eq!( + default_aliases.get(&Symbol::terminal(3)), + Some(&Alias { + value: "a6".to_string(), + is_named: true, + }) + ); + assert_eq!(default_aliases.get(&Symbol::terminal(1)), None); + + assert_eq!( + syntax_grammar.variables, + vec![ + SyntaxVariable { + name: "v1".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), + ], + },], + }, + SyntaxVariable { + name: "v2".to_owned(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(1)), + ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), + ProductionStep::new(Symbol::terminal(3)), + ProductionStep::new(Symbol::terminal(3)), + ], + },], + }, + ] + ); + } +} diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs deleted file mode 100644 index 6da009d56d..0000000000 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ /dev/null @@ -1,223 +0,0 @@ -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; - -#[derive(Clone, Default)] -struct SymbolStatus { - alias: Option, - conflicting: bool, -} - -pub(super) fn extract_simple_aliases( - syntax_grammar: &mut SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> AliasMap { - // Determine which symbols in the grammars are *always* aliased to a single name. - let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; - let mut non_terminal_status_list = - vec![SymbolStatus::default(); syntax_grammar.variables.len()]; - let mut external_status_list = - vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; - for variable in syntax_grammar.variables.iter() { - for production in variable.productions.iter() { - for step in production.steps.iter() { - let mut status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &mut external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &mut non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &mut terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if step.alias.is_none() { - status.alias = None; - status.conflicting = true; - } - - if !status.conflicting { - if status.alias.is_none() { - status.alias = step.alias.clone(); - } else if status.alias != step.alias { - status.alias = None; - status.conflicting = true; - } - } - } - } - } - - // Remove the aliases for those symbols. - for variable in syntax_grammar.variables.iter_mut() { - for production in variable.productions.iter_mut() { - for step in production.steps.iter_mut() { - let status = match step.symbol { - Symbol { - kind: SymbolType::External, - index, - } => &external_status_list[index], - Symbol { - kind: SymbolType::NonTerminal, - index, - } => &non_terminal_status_list[index], - Symbol { - kind: SymbolType::Terminal, - index, - } => &terminal_status_list[index], - Symbol { - kind: SymbolType::End, - .. - } => panic!("Unexpected end token"), - }; - - if status.alias.is_some() { - step.alias = None; - } - } - } - } - - // Populate a map of the symbols to their aliases. - let mut result = AliasMap::new(); - for (i, status) in terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::terminal(i), alias); - } - } - for (i, status) in non_terminal_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::non_terminal(i), alias); - } - } - for (i, status) in external_status_list.into_iter().enumerate() { - if let Some(alias) = status.alias { - result.insert(Symbol::external(i), alias); - } - } - result -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, - }; - use crate::generate::nfa::Nfa; - - #[test] - fn test_extract_simple_aliases() { - let mut syntax_grammar = SyntaxGrammar { - variables: vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - }], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // Token 0 is always aliased as "a1". - ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - // Token 1 is aliased above, but not here. - ProductionStep::new(Symbol::terminal(1)), - // Token 2 is aliased differently than above. - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - }], - }, - ], - extra_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), - external_tokens: Vec::new(), - word_token: None, - }; - - let lexical_grammar = LexicalGrammar { - nfa: Nfa::new(), - variables: vec![ - LexicalVariable { - name: "t1".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t2".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - LexicalVariable { - name: "t3".to_string(), - kind: VariableType::Anonymous, - implicit_precedence: 0, - start_state: 0, - }, - ], - }; - - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); - assert_eq!(simple_aliases.len(), 1); - assert_eq!( - simple_aliases[&Symbol::terminal(0)], - Alias { - value: "a1".to_string(), - is_named: true, - } - ); - - assert_eq!( - syntax_grammar.variables, - vec![ - SyntaxVariable { - name: "v1".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - // 'Simple' alias removed - ProductionStep::new(Symbol::terminal(0)), - // Other aliases unchanged - ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - ], - },], - }, - SyntaxVariable { - name: "v2".to_owned(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(1)), - ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true), - ], - },], - }, - ] - ); - } -} diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 7cd411ef6b..276f13ff0b 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -73,6 +73,12 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result ); } + for (i, variable) in variables.iter_mut().enumerate() { + if supertype_symbols.contains(&Symbol::non_terminal(i)) { + variable.kind = VariableType::Hidden; + } + } + Ok(InternedGrammar { variables, external_tokens, diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 029483d37e..8b094c562d 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -1,6 +1,6 @@ mod expand_repeats; mod expand_tokens; -mod extract_simple_aliases; +mod extract_default_aliases; mod extract_tokens; mod flatten_grammar; mod intern_symbols; @@ -8,7 +8,7 @@ mod process_inlines; use self::expand_repeats::expand_repeats; pub(crate) use self::expand_tokens::expand_tokens; -use self::extract_simple_aliases::extract_simple_aliases; +use self::extract_default_aliases::extract_default_aliases; use self::extract_tokens::extract_tokens; use self::flatten_grammar::flatten_grammar; use self::intern_symbols::intern_symbols; @@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar( let syntax_grammar = expand_repeats(syntax_grammar); let mut syntax_grammar = flatten_grammar(syntax_grammar)?; let lexical_grammar = expand_tokens(lexical_grammar)?; - let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar); + let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); let inlines = process_inlines(&syntax_grammar); - Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases)) + Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 9ef89d75c4..f83658b2f4 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder { last_inserted_step.associativity = removed_step.associativity; } } + if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() { + production.dynamic_precedence = p.dynamic_precedence; + } production }), ); @@ -226,7 +229,7 @@ mod tests { ], }, Production { - dynamic_precedence: 0, + dynamic_precedence: -2, steps: vec![ProductionStep::new(Symbol::terminal(14))], }, ], @@ -258,7 +261,7 @@ mod tests { ], }, Production { - dynamic_precedence: 0, + dynamic_precedence: -2, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(14)), diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index e8c59d07b3..58d99cc452 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -7,7 +7,7 @@ use super::tables::{ }; use core::ops::Range; use std::cmp; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::fmt::Write; use std::mem::swap; @@ -15,6 +15,8 @@ use std::mem::swap; // stabilized, and the parser generation does not use it by default. const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1; +const LARGE_CHARACTER_RANGE_COUNT: usize = 8; + macro_rules! add { ($this: tt, $($arg: tt)*) => {{ $this.buffer.write_fmt(format_args!($($arg)*)).unwrap(); @@ -63,15 +65,29 @@ struct Generator { keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, symbol_order: HashMap, symbol_ids: HashMap, alias_ids: HashMap, - alias_map: BTreeMap>, + unique_aliases: Vec, + symbol_map: HashMap, field_names: Vec, next_abi: bool, } +struct TransitionSummary { + is_included: bool, + ranges: Vec>, + call_id: Option, +} + +struct LargeCharacterSetInfo { + ranges: Vec>, + symbol: Symbol, + index: usize, + usage_count: usize, +} + impl Generator { fn generate(mut self) -> String { self.init(); @@ -80,11 +96,7 @@ impl Generator { self.add_stats(); self.add_symbol_enum(); self.add_symbol_names_list(); - - if self.next_abi { - self.add_unique_symbol_map(); - } - + self.add_unique_symbol_map(); self.add_symbol_metadata_list(); if !self.field_names.is_empty() { @@ -97,14 +109,18 @@ impl Generator { self.add_alias_sequences(); } + if self.next_abi { + self.add_non_terminal_alias_map(); + } + let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); - self.add_lex_function("ts_lex", main_lex_table); + self.add_lex_function("ts_lex", main_lex_table, true); if self.keyword_capture_token.is_some() { let mut keyword_lex_table = LexTable::default(); swap(&mut keyword_lex_table, &mut self.keyword_lex_table); - self.add_lex_function("ts_lex_keywords", keyword_lex_table); + self.add_lex_function("ts_lex_keywords", keyword_lex_table, false); } self.add_lex_modes_list(); @@ -127,55 +143,105 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - let mut field_names = Vec::new(); + self.symbol_map = self + .parse_table + .symbols + .iter() + .map(|symbol| { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { + mapping = other_symbol; + } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; + } + } + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + mapping = other_symbol; + break; + } + } + } + + (*symbol, *mapping) + }) + .collect(); + for production_info in &self.parse_table.production_infos { + // Build a list of all field names for field_name in production_info.field_map.keys() { - field_names.push(field_name); + if let Err(i) = self.field_names.binary_search(&field_name) { + self.field_names.insert(i, field_name.clone()); + } } for alias in &production_info.alias_sequence { + // Generate a mapping from aliases to C identifiers. if let Some(alias) = &alias { - let alias_kind = alias.kind(); - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind + let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + if let Some(default_alias) = self.default_aliases.get(symbol) { + default_alias == alias + } else { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + } }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; + + // Some aliases match an existing symbol in the grammar. + let alias_id; + if let Some(existing_symbol) = existing_symbol { + alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone(); + } + // Other aliases don't match any existing symbol, and need their own identifiers. + else { + if let Err(i) = self.unique_aliases.binary_search(alias) { + self.unique_aliases.insert(i, alias.clone()); + } + + alias_id = if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + } + self.alias_ids.entry(alias.clone()).or_insert(alias_id); - self.alias_map - .entry(alias.clone()) - .or_insert(matching_symbol); } } } - field_names.sort_unstable(); - field_names.dedup(); - self.field_names = field_names.into_iter().cloned().collect(); - - // If we are opting in to the new unstable language ABI, then use the concept of - // "small parse states". Otherwise, use the same representation for all parse - // states. - if self.next_abi { - let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); - self.large_state_count = self - .parse_table - .states - .iter() - .enumerate() - .take_while(|(i, s)| { - *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold - }) - .count(); - } else { - self.large_state_count = self.parse_table.states.len(); - } + // Determine which states should use the "small state" representation, and which should + // use the normal array representation. + let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); + self.large_state_count = self + .parse_table + .states + .iter() + .enumerate() + .take_while(|(i, s)| { + *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold + }) + .count(); } fn add_includes(&mut self) { @@ -241,21 +307,14 @@ impl Generator { "#define STATE_COUNT {}", self.parse_table.states.len() ); - - if self.next_abi { - add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); - } + add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); add_line!( self, "#define SYMBOL_COUNT {}", self.parse_table.symbols.len() ); - add_line!( - self, - "#define ALIAS_COUNT {}", - self.alias_map.iter().filter(|e| e.1.is_none()).count() - ); + add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),); add_line!(self, "#define TOKEN_COUNT {}", token_count); add_line!( self, @@ -283,11 +342,9 @@ impl Generator { i += 1; } } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!(self, "{} = {},", self.alias_ids[&alias], i); - i += 1; - } + for alias in &self.unique_aliases { + add_line!(self, "{} = {},", self.alias_ids[&alias], i); + i += 1; } dedent!(self); add_line!(self, "}};"); @@ -299,22 +356,20 @@ impl Generator { indent!(self); for symbol in self.parse_table.symbols.iter() { let name = self.sanitize_string( - self.simple_aliases + self.default_aliases .get(symbol) .map(|alias| alias.value.as_str()) .unwrap_or(self.metadata_for_symbol(*symbol).0), ); add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name); } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!( - self, - "[{}] = \"{}\",", - self.alias_ids[&alias], - self.sanitize_string(&alias.value) - ); - } + for alias in &self.unique_aliases { + add_line!( + self, + "[{}] = \"{}\",", + self.alias_ids[&alias], + self.sanitize_string(&alias.value) + ); } dedent!(self); add_line!(self, "}};"); @@ -325,58 +380,21 @@ impl Generator { add_line!(self, "static TSSymbol ts_symbol_map[] = {{"); indent!(self); for symbol in &self.parse_table.symbols { - let mut mapping = symbol; - - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.simple_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.simple_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; - } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { - mapping = other_symbol; - break; - } - } - } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; - } - } - } - add_line!( self, "[{}] = {},", - self.symbol_ids[&symbol], - self.symbol_ids[mapping], + self.symbol_ids[symbol], + self.symbol_ids[&self.symbol_map[symbol]], ); } - for (alias, symbol) in &self.alias_map { - if symbol.is_none() { - add_line!( - self, - "[{}] = {},", - self.alias_ids[&alias], - self.alias_ids[&alias], - ); - } + for alias in &self.unique_aliases { + add_line!( + self, + "[{}] = {},", + self.alias_ids[&alias], + self.alias_ids[&alias], + ); } dedent!(self); @@ -421,7 +439,7 @@ impl Generator { for symbol in &self.parse_table.symbols { add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]); indent!(self); - if let Some(Alias { is_named, .. }) = self.simple_aliases.get(symbol) { + if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { add_line!(self, ".visible = true,"); add_line!(self, ".named = {},", is_named); } else { @@ -437,6 +455,9 @@ impl Generator { VariableType::Hidden => { add_line!(self, ".visible = false,"); add_line!(self, ".named = true,"); + if self.syntax_grammar.supertype_symbols.contains(symbol) { + add_line!(self, ".supertype = true,"); + } } VariableType::Auxiliary => { add_line!(self, ".visible = false,"); @@ -447,15 +468,13 @@ impl Generator { dedent!(self); add_line!(self, "}},"); } - for (alias, matching_symbol) in &self.alias_map { - if matching_symbol.is_none() { - add_line!(self, "[{}] = {{", self.alias_ids[&alias]); - indent!(self); - add_line!(self, ".visible = true,"); - add_line!(self, ".named = {},", alias.is_named); - dedent!(self); - add_line!(self, "}},"); - } + for alias in &self.unique_aliases { + add_line!(self, "[{}] = {{", self.alias_ids[&alias]); + indent!(self); + add_line!(self, ".visible = true,"); + add_line!(self, ".named = {},", alias.is_named); + dedent!(self); + add_line!(self, "}},"); } dedent!(self); add_line!(self, "}};"); @@ -494,6 +513,53 @@ impl Generator { add_line!(self, ""); } + fn add_non_terminal_alias_map(&mut self) { + let mut alias_ids_by_symbol = HashMap::new(); + for variable in &self.syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + if let Some(alias) = &step.alias { + if step.symbol.is_non_terminal() + && Some(alias) != self.default_aliases.get(&step.symbol) + { + if self.symbol_ids.contains_key(&step.symbol) { + if let Some(alias_id) = self.alias_ids.get(&alias) { + let alias_ids = alias_ids_by_symbol + .entry(step.symbol) + .or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias_id) { + alias_ids.insert(i, alias_id); + } + } + } + } + } + } + } + } + + let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::>(); + alias_ids_by_symbol.sort_unstable_by_key(|e| e.0); + + add_line!(self, "static uint16_t ts_non_terminal_alias_map[] = {{"); + indent!(self); + for (symbol, alias_ids) in alias_ids_by_symbol { + let symbol_id = &self.symbol_ids[symbol]; + let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; + add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len()); + indent!(self); + add_line!(self, "{},", public_symbol_id); + for alias_id in alias_ids { + add_line!(self, "{},", alias_id); + } + dedent!(self); + } + add_line!(self, "0,"); + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_field_sequences(&mut self) { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; @@ -570,28 +636,120 @@ impl Generator { add_line!(self, ""); } - fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { + fn add_lex_function( + &mut self, + name: &str, + lex_table: LexTable, + extract_helper_functions: bool, + ) { + let mut ruled_out_chars = HashSet::new(); + let mut large_character_sets = Vec::::new(); + + // For each lex state, compute a summary of the code that needs to be + // generated. + let state_transition_summaries: Vec> = lex_table + .states + .iter() + .map(|state| { + ruled_out_chars.clear(); + + // For each state transition, compute the set of character ranges + // that need to be checked. + state + .advance_actions + .iter() + .map(|(chars, action)| { + let (chars, is_included) = match chars { + CharacterSet::Include(c) => (c, true), + CharacterSet::Exclude(c) => (c, false), + }; + let mut call_id = None; + let mut ranges = + CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + if is_included { + ruled_out_chars.extend(chars.iter().map(|c| *c as u32)); + } else { + ranges.insert(0, '\0'..'\0') + } + + // Record any large character sets so that they can be extracted + // into helper functions, reducing code duplication. + if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT { + let char_set_symbol = self + .symbol_for_advance_action(action, &lex_table) + .expect("No symbol for lex state"); + let mut count_for_symbol = 0; + for (i, info) in large_character_sets.iter_mut().enumerate() { + if info.ranges == ranges { + call_id = Some(i); + info.usage_count += 1; + break; + } + if info.symbol == char_set_symbol { + count_for_symbol += 1; + } + } + if call_id.is_none() { + call_id = Some(large_character_sets.len()); + large_character_sets.push(LargeCharacterSetInfo { + symbol: char_set_symbol, + index: count_for_symbol + 1, + ranges: ranges.clone(), + usage_count: 1, + }); + } + } + + TransitionSummary { + is_included, + ranges, + call_id, + } + }) + .collect() + }) + .collect(); + + // Generate a helper function for each large character set. + let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect(); + sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index)); + for info in sorted_large_char_sets { + if info.usage_count > 1 { + add_line!( + self, + "static inline bool {}_character_set_{}(int32_t lookahead) {{", + self.symbol_ids[&info.symbol], + info.index + ); + indent!(self); + add_line!(self, "return"); + indent!(self); + add_whitespace!(self); + self.add_character_range_conditions(&info.ranges, true, 0); + add!(self, ";\n"); + dedent!(self); + dedent!(self); + add_line!(self, "}}"); + add_line!(self, ""); + } + } + add_line!( self, "static bool {}(TSLexer *lexer, TSStateId state) {{", name ); indent!(self); - add_line!(self, "START_LEXER();"); - - if self.next_abi { - add_line!(self, "eof = lexer->eof(lexer);"); - } else { - add_line!(self, "eof = lookahead == 0;"); - } + add_line!(self, "START_LEXER();"); + add_line!(self, "eof = lexer->eof(lexer);"); add_line!(self, "switch (state) {{"); - indent!(self); + indent!(self); for (i, state) in lex_table.states.into_iter().enumerate() { add_line!(self, "case {}:", i); indent!(self); - self.add_lex_state(state); + self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets); dedent!(self); } @@ -602,12 +760,41 @@ impl Generator { dedent!(self); add_line!(self, "}}"); + dedent!(self); add_line!(self, "}}"); add_line!(self, ""); } - fn add_lex_state(&mut self, state: LexState) { + fn symbol_for_advance_action( + &self, + action: &AdvanceAction, + lex_table: &LexTable, + ) -> Option { + let mut state_ids = vec![action.state]; + let mut i = 0; + while i < state_ids.len() { + let id = state_ids[i]; + let state = &lex_table.states[id]; + if let Some(accept) = state.accept_action { + return Some(accept); + } + for (_, action) in &state.advance_actions { + if !state_ids.contains(&action.state) { + state_ids.push(action.state); + } + } + i += 1; + } + return None; + } + + fn add_lex_state( + &mut self, + state: LexState, + transition_info: &Vec, + large_character_sets: &Vec, + ) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); } @@ -616,92 +803,95 @@ impl Generator { add_line!(self, "if (eof) ADVANCE({});", eof_action.state); } - let mut ruled_out_characters = HashSet::new(); - for (characters, action) in state.advance_actions { - let previous_length = self.buffer.len(); - + for (i, (_, action)) in state.advance_actions.into_iter().enumerate() { + let transition = &transition_info[i]; add_whitespace!(self); - add!(self, "if ("); - if self.add_character_set_condition(&characters, &ruled_out_characters) { - add!(self, ") "); - self.add_advance_action(&action); - if let CharacterSet::Include(chars) = characters { - ruled_out_characters.extend(chars.iter().map(|c| *c as u32)); + + // If there is a helper function for this transition's character + // set, then generate a call to that helper function. + if let Some(call_id) = transition.call_id { + let info = &large_character_sets[call_id]; + if info.usage_count > 1 { + add!(self, "if ("); + if !transition.is_included { + add!(self, "!"); + } + add!( + self, + "{}_character_set_{}(lookahead)) ", + self.symbol_ids[&info.symbol], + info.index + ); + self.add_advance_action(&action); + add!(self, "\n"); + continue; } - } else { - self.buffer.truncate(previous_length); - self.add_advance_action(&action); } + + // Otherwise, generate code to compare the lookahead character + // with all of the character ranges. + if transition.ranges.len() > 0 { + add!(self, "if ("); + self.add_character_range_conditions(&transition.ranges, transition.is_included, 2); + add!(self, ") "); + } + self.add_advance_action(&action); add!(self, "\n"); } add_line!(self, "END_STATE();"); } - fn add_character_set_condition( + fn add_character_range_conditions( &mut self, - characters: &CharacterSet, - ruled_out_characters: &HashSet, + ranges: &[Range], + is_included: bool, + indent_count: usize, ) -> bool { - match characters { - CharacterSet::Include(chars) => { - let ranges = Self::get_ranges(chars, ruled_out_characters); - self.add_character_range_conditions(ranges, false) - } - CharacterSet::Exclude(chars) => { - let ranges = Some('\0'..'\0') - .into_iter() - .chain(Self::get_ranges(chars, ruled_out_characters)); - self.add_character_range_conditions(ranges, true) - } + let mut line_break = "\n".to_string(); + for _ in 0..self.indent_level + indent_count { + line_break.push_str(" "); } - } - fn add_character_range_conditions( - &mut self, - ranges: impl Iterator>, - is_negated: bool, - ) -> bool { - let line_break = "\n "; let mut did_add = false; for range in ranges { - if is_negated { + if is_included { if did_add { - add!(self, " &&{}", line_break); + add!(self, " ||{}", line_break); } if range.end == range.start { - add!(self, "lookahead != "); + add!(self, "lookahead == "); self.add_character(range.start); } else if range.end as u32 == range.start as u32 + 1 { - add!(self, "lookahead != "); + add!(self, "lookahead == "); self.add_character(range.start); - add!(self, " &&{}lookahead != ", line_break); + add!(self, " ||{}lookahead == ", line_break); self.add_character(range.end); } else { - add!(self, "(lookahead < "); + add!(self, "("); self.add_character(range.start); - add!(self, " || "); + add!(self, " <= lookahead && lookahead <= "); self.add_character(range.end); - add!(self, " < lookahead)"); + add!(self, ")"); } } else { if did_add { - add!(self, " ||{}", line_break); + add!(self, " &&{}", line_break); } if range.end == range.start { - add!(self, "lookahead == "); + add!(self, "lookahead != "); self.add_character(range.start); } else if range.end as u32 == range.start as u32 + 1 { - add!(self, "lookahead == "); + add!(self, "lookahead != "); self.add_character(range.start); - add!(self, " ||{}lookahead == ", line_break); + add!(self, " &&{}lookahead != ", line_break); self.add_character(range.end); } else { - add!(self, "("); + add!(self, "(lookahead < "); self.add_character(range.start); - add!(self, " <= lookahead && lookahead <= "); + add!(self, " || "); self.add_character(range.end); - add!(self, ")"); + add!(self, " < lookahead)"); } } did_add = true; @@ -709,40 +899,6 @@ impl Generator { did_add } - fn get_ranges<'a>( - chars: &'a Vec, - ruled_out_characters: &'a HashSet, - ) -> impl Iterator> + 'a { - let mut prev_range: Option> = None; - chars - .iter() - .map(|c| (*c, false)) - .chain(Some(('\0', true))) - .filter_map(move |(c, done)| { - if done { - return prev_range.clone(); - } - if ruled_out_characters.contains(&(c as u32)) { - return None; - } - if let Some(range) = prev_range.clone() { - let mut prev_range_successor = range.end as u32 + 1; - while prev_range_successor < c as u32 { - if !ruled_out_characters.contains(&prev_range_successor) { - prev_range = Some(c..c); - return Some(range); - } - prev_range_successor += 1; - } - prev_range = Some(range.start..c); - None - } else { - prev_range = Some(c..c); - None - } - }) - } - fn add_advance_action(&mut self, action: &AdvanceAction) { if action.in_main_token { add!(self, "ADVANCE({});", action.state); @@ -759,7 +915,7 @@ impl Generator { && state.terminal_entries.len() == 1 && *state.terminal_entries.iter().next().unwrap().0 == Symbol::end() { - add_line!(self, "[{}] = {{-1}},", i,); + add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); } else if state.external_lex_state_id > 0 { add_line!( self, @@ -858,12 +1014,7 @@ impl Generator { add_line!( self, - "static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{", - if self.next_abi { - "LARGE_STATE_COUNT" - } else { - "STATE_COUNT" - } + "static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{", ); indent!(self); @@ -1022,7 +1173,7 @@ impl Generator { for (i, entry) in parse_table_entries { add!( self, - " [{}] = {{.count = {}, .reusable = {}}},", + " [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},", i, entry.actions.len(), entry.reusable @@ -1115,35 +1266,12 @@ impl Generator { add_line!(self, ".symbol_count = SYMBOL_COUNT,"); add_line!(self, ".alias_count = ALIAS_COUNT,"); add_line!(self, ".token_count = TOKEN_COUNT,"); - - if self.next_abi { - add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); - } - + add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); + add_line!(self, ".symbol_names = ts_symbol_names,"); add_line!(self, ".symbol_metadata = ts_symbol_metadata,"); - add_line!( - self, - ".parse_table = (const unsigned short *)ts_parse_table," - ); - - if self.large_state_count < self.parse_table.states.len() { - add_line!( - self, - ".small_parse_table = (const uint16_t *)ts_small_parse_table," - ); - add_line!( - self, - ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map," - ); - } - + add_line!(self, ".parse_table = (const uint16_t *)ts_parse_table,"); add_line!(self, ".parse_actions = ts_parse_actions,"); add_line!(self, ".lex_modes = ts_lex_modes,"); - add_line!(self, ".symbol_names = ts_symbol_names,"); - - if self.next_abi { - add_line!(self, ".public_symbol_map = ts_symbol_map,"); - } if !self.parse_table.production_infos.is_empty() { add_line!( @@ -1151,27 +1279,12 @@ impl Generator { ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); } - - add_line!(self, ".field_count = FIELD_COUNT,"); - - if !self.field_names.is_empty() { - add_line!(self, ".field_names = ts_field_names,"); - add_line!( - self, - ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," - ); - add_line!( - self, - ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," - ); - } - add_line!( self, ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," ); - add_line!(self, ".lex_fn = ts_lex,"); + add_line!(self, ".lex_fn = ts_lex,"); if let Some(keyword_capture_token) = self.keyword_capture_token { add_line!(self, ".keyword_lex_fn = ts_lex_keywords,"); add_line!( @@ -1181,8 +1294,6 @@ impl Generator { ); } - add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); - if !self.syntax_grammar.external_tokens.is_empty() { add_line!(self, ".external_scanner = {{"); indent!(self); @@ -1196,8 +1307,40 @@ impl Generator { dedent!(self); add_line!(self, "}},"); } - dedent!(self); + add_line!(self, ".field_count = FIELD_COUNT,"); + if !self.field_names.is_empty() { + add_line!( + self, + ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," + ); + add_line!( + self, + ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," + ); + add_line!(self, ".field_names = ts_field_names,"); + } + + add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); + if self.large_state_count < self.parse_table.states.len() { + add_line!( + self, + ".small_parse_table = (const uint16_t *)ts_small_parse_table," + ); + add_line!( + self, + ".small_parse_table_map = (const uint32_t *)ts_small_parse_table_map," + ); + } + + add_line!(self, ".public_symbol_map = ts_symbol_map,"); + + if self.next_abi { + add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); + add_line!(self, ".state_count = STATE_COUNT,"); + } + + dedent!(self); add_line!(self, "}};"); add_line!(self, "return &language;"); dedent!(self); @@ -1357,6 +1500,7 @@ impl Generator { for c in name.chars() { match c { '\"' => result += "\\\"", + '?' => result += "\\?", '\\' => result += "\\\\", '\u{000c}' => result += "\\f", '\n' => result += "\\n", @@ -1399,7 +1543,7 @@ impl Generator { /// for keyword capture, if any. /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar -/// * `simple_aliases` - A map describing the global rename rules that should apply. +/// * `default_aliases` - A map describing the global rename rules that should apply. /// the keys are symbols that are *always* aliased in the same way, and the values /// are the aliases that are applied to those symbols. /// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse @@ -1412,7 +1556,7 @@ pub(crate) fn render_c_code( keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - simple_aliases: AliasMap, + default_aliases: AliasMap, next_abi: bool, ) -> String { Generator { @@ -1426,59 +1570,14 @@ pub(crate) fn render_c_code( keyword_capture_token, syntax_grammar, lexical_grammar, - simple_aliases, + default_aliases, symbol_ids: HashMap::new(), symbol_order: HashMap::new(), alias_ids: HashMap::new(), - alias_map: BTreeMap::new(), + symbol_map: HashMap::new(), + unique_aliases: Vec::new(), field_names: Vec::new(), next_abi, } .generate() } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_get_char_ranges() { - struct Row { - chars: Vec, - ruled_out_chars: Vec, - expected_ranges: Vec>, - } - - let table = [ - Row { - chars: vec!['a'], - ruled_out_chars: vec![], - expected_ranges: vec!['a'..'a'], - }, - Row { - chars: vec!['a', 'b', 'c', 'e', 'z'], - ruled_out_chars: vec![], - expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], - }, - Row { - chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], - ruled_out_chars: vec!['d', 'f', 'g'], - expected_ranges: vec!['a'..'h', 'z'..'z'], - }, - ]; - - for Row { - chars, - ruled_out_chars, - expected_ranges, - } in table.iter() - { - let ruled_out_chars = ruled_out_chars - .into_iter() - .map(|c: &char| *c as u32) - .collect(); - let ranges = Generator::get_ranges(chars, &ruled_out_chars).collect::>(); - assert_eq!(ranges, *expected_ranges); - } - } -} diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index c6b1193d65..330c9e57d3 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -7,6 +7,7 @@ use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; use std::collections::HashMap; +use std::sync::atomic::AtomicUsize; use std::time::Instant; use std::{fs, io, path, str, usize}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer}; @@ -278,14 +279,14 @@ pub fn ansi( source: &[u8], config: &HighlightConfiguration, print_time: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); let time = Instant::now(); - let cancellation_flag = util::cancel_on_stdin(); let mut highlighter = Highlighter::new(); - let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| { + let events = highlighter.highlight(config, source, cancellation_flag, |string| { loader.highlight_config_for_injection_string(string) })?; @@ -320,6 +321,7 @@ pub fn html( theme: &Theme, source: &[u8], config: &HighlightConfiguration, + quiet: bool, print_time: bool, ) -> Result<()> { use std::io::Write; @@ -343,17 +345,19 @@ pub fn html( } })?; - write!(&mut stdout, "\n")?; - for (i, line) in renderer.lines().enumerate() { - write!( - &mut stdout, - "\n", - i + 1, - line - )?; - } + if !quiet { + write!(&mut stdout, "
{}{}
\n")?; + for (i, line) in renderer.lines().enumerate() { + write!( + &mut stdout, + "\n", + i + 1, + line + )?; + } - write!(&mut stdout, "
{}{}
\n")?; + write!(&mut stdout, "\n")?; + } if print_time { eprintln!("Time: {}ms", time.elapsed().as_millis()); diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 97c288a17c..e00323b78b 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -6,6 +6,7 @@ pub mod loader; pub mod logger; pub mod parse; pub mod query; +pub mod query_testing; pub mod tags; pub mod test; pub mod test_highlight; diff --git a/cli/src/loader.rs b/cli/src/loader.rs index cf2eb1432b..3d5a937700 100644 --- a/cli/src/loader.rs +++ b/cli/src/loader.rs @@ -5,14 +5,15 @@ use regex::{Regex, RegexBuilder}; use serde_derive::Deserialize; use std::collections::HashMap; use std::io::BufReader; +use std::ops::Range; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Mutex; use std::time::SystemTime; use std::{fs, mem}; -use tree_sitter::Language; +use tree_sitter::{Language, QueryError}; use tree_sitter_highlight::HighlightConfiguration; -use tree_sitter_tags::TagsConfiguration; +use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; #[cfg(unix)] const DYLIB_EXTENSION: &'static str = "so"; @@ -160,7 +161,9 @@ impl Loader { // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { - let file_contents = fs::read_to_string(path)?; + let file_contents = fs::read(path) + .map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?; + let file_contents = String::from_utf8_lossy(&file_contents); let mut best_score = -2isize; let mut best_configuration_id = None; for configuration_id in configuration_ids { @@ -541,13 +544,15 @@ impl Loader { impl<'a> LanguageConfiguration<'a> { pub fn highlight_config(&self, language: Language) -> Result> { - self.highlight_config + return self + .highlight_config .get_or_try_init(|| { - let highlights_query = + let (highlights_query, highlight_ranges) = self.read_queries(&self.highlights_filenames, "highlights.scm")?; - let injections_query = + let (injections_query, injection_ranges) = self.read_queries(&self.injections_filenames, "injections.scm")?; - let locals_query = self.read_queries(&self.locals_filenames, "locals.scm")?; + let (locals_query, locals_ranges) = + self.read_queries(&self.locals_filenames, "locals.scm")?; if highlights_query.is_empty() { Ok(None) @@ -558,9 +563,30 @@ impl<'a> LanguageConfiguration<'a> { &injections_query, &locals_query, ) - .map_err(Error::wrap(|| { - format!("Failed to load queries in {:?}", self.root_path) - }))?; + .map_err(|error| { + if error.offset < injections_query.len() { + Self::include_path_in_query_error( + error, + &injection_ranges, + &injections_query, + 0, + ) + } else if error.offset < injections_query.len() + locals_query.len() { + Self::include_path_in_query_error( + error, + &locals_ranges, + &locals_query, + injections_query.len(), + ) + } else { + Self::include_path_in_query_error( + error, + &highlight_ranges, + &highlights_query, + injections_query.len() + locals_query.len(), + ) + } + })?; let mut all_highlight_names = self.highlight_names.lock().unwrap(); if self.use_all_highlight_names { for capture_name in result.query.capture_names() { @@ -573,48 +599,95 @@ impl<'a> LanguageConfiguration<'a> { Ok(Some(result)) } }) - .map(Option::as_ref) + .map(Option::as_ref); } pub fn tags_config(&self, language: Language) -> Result> { self.tags_config .get_or_try_init(|| { - let tags_query = self.read_queries(&self.tags_filenames, "tags.scm")?; - let locals_query = self.read_queries(&self.locals_filenames, "locals.scm")?; + let (tags_query, tags_ranges) = + self.read_queries(&self.tags_filenames, "tags.scm")?; + let (locals_query, locals_ranges) = + self.read_queries(&self.locals_filenames, "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { TagsConfiguration::new(language, &tags_query, &locals_query) - .map_err(Error::wrap(|| { - format!("Failed to load queries in {:?}", self.root_path) - })) - .map(|config| Some(config)) + .map(Some) + .map_err(|error| { + if let TagsError::Query(error) = error { + if error.offset < locals_query.len() { + Self::include_path_in_query_error( + error, + &locals_ranges, + &locals_query, + 0, + ) + } else { + Self::include_path_in_query_error( + error, + &tags_ranges, + &tags_query, + locals_query.len(), + ) + } + .into() + } else { + error.into() + } + }) } }) .map(Option::as_ref) } - fn read_queries(&self, paths: &Option>, default_path: &str) -> Result { + fn include_path_in_query_error<'b>( + mut error: QueryError, + ranges: &'b Vec<(String, Range)>, + source: &str, + start_offset: usize, + ) -> (&'b str, QueryError) { + let offset_within_section = error.offset - start_offset; + let (path, range) = ranges + .iter() + .find(|(_, range)| range.contains(&offset_within_section)) + .unwrap(); + error.offset = offset_within_section - range.start; + error.row = source[range.start..offset_within_section] + .chars() + .filter(|c| *c == '\n') + .count(); + (path.as_ref(), error) + } + + fn read_queries( + &self, + paths: &Option>, + default_path: &str, + ) -> Result<(String, Vec<(String, Range)>)> { + let mut query = String::new(); + let mut path_ranges = Vec::new(); if let Some(paths) = paths.as_ref() { - let mut query = String::new(); for path in paths { - let path = self.root_path.join(path); - query += &fs::read_to_string(&path).map_err(Error::wrap(|| { + let abs_path = self.root_path.join(path); + let prev_query_len = query.len(); + query += &fs::read_to_string(&abs_path).map_err(Error::wrap(|| { format!("Failed to read query file {:?}", path) }))?; + path_ranges.push((path.clone(), prev_query_len..query.len())); } - Ok(query) } else { let queries_path = self.root_path.join("queries"); let path = queries_path.join(default_path); if path.exists() { - fs::read_to_string(&path).map_err(Error::wrap(|| { + query = fs::read_to_string(&path).map_err(Error::wrap(|| { format!("Failed to read query file {:?}", path) - })) - } else { - Ok(String::new()) + }))?; + path_ranges.push((default_path.to_string(), 0..query.len())); } } + + Ok((query, path_ranges)) } } diff --git a/cli/src/main.rs b/cli/src/main.rs index c5c0e0e07c..1cf90c67c6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -7,7 +7,7 @@ use std::{env, fs, u64}; use tree_sitter::Language; use tree_sitter_cli::{ config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight, - wasm, web_ui, + util, wasm, web_ui, }; const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -53,18 +53,20 @@ fn run() -> error::Result<()> { .subcommand( SubCommand::with_name("parse") .about("Parse files") + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(1) .multiple(true) - .required(true), + .required(false), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) + .arg(Arg::with_name("debug-xml").long("xml").short("x")) .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("stat").long("stat").short("s")) .arg(Arg::with_name("time").long("time").short("t")) - .arg(Arg::with_name("allow-cancellation").long("cancel")) .arg(Arg::with_name("timeout").long("timeout").takes_value(true)) .arg( Arg::with_name("edits") @@ -79,37 +81,34 @@ fn run() -> error::Result<()> { SubCommand::with_name("query") .about("Search files using a syntax tree query") .arg(Arg::with_name("query-path").index(1).required(true)) + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(2) .multiple(true) - .required(true), + .required(false), + ) + .arg( + Arg::with_name("byte-range") + .help("The range of byte offsets in which the query will be executed") + .long("byte-range") + .takes_value(true), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) - .arg(Arg::with_name("captures").long("captures").short("c")), + .arg(Arg::with_name("captures").long("captures").short("c")) + .arg(Arg::with_name("test").long("test")), ) .subcommand( SubCommand::with_name("tags") - .arg( - Arg::with_name("format") - .short("f") - .long("format") - .value_name("json|protobuf") - .help("Determine output format (default: json)"), - ) + .arg(Arg::with_name("quiet").long("quiet").short("q")) + .arg(Arg::with_name("time").long("time").short("t")) .arg(Arg::with_name("scope").long("scope").takes_value(true)) + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("inputs") + Arg::with_name("paths") .help("The source file to use") .index(1) - .required(true) .multiple(true), - ) - .arg( - Arg::with_name("v") - .short("v") - .multiple(true) - .help("Sets the level of verbosity"), ), ) .subcommand( @@ -121,22 +120,24 @@ fn run() -> error::Result<()> { .short("f") .takes_value(true), ) + .arg(Arg::with_name("update").long("update").short("u")) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) .subcommand( SubCommand::with_name("highlight") .about("Highlight a file") + .arg(Arg::with_name("paths-file").long("paths").takes_value(true)) .arg( - Arg::with_name("path") + Arg::with_name("paths") .index(1) .multiple(true) - .required(true), + .required(false), ) .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("html").long("html").short("h")) .arg(Arg::with_name("time").long("time").short("t")) - .arg(Arg::with_name("q").short("q")), + .arg(Arg::with_name("quiet").long("quiet").short("q")), ) .subcommand( SubCommand::with_name("build-wasm") @@ -149,7 +150,14 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("path").index(1).multiple(true)), ) .subcommand( - SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"), + SubCommand::with_name("web-ui") + .about("Test a parser interactively in the browser") + .arg( + Arg::with_name("quiet") + .long("quiet") + .short("q") + .help("open in default browser"), + ), ) .subcommand( SubCommand::with_name("dump-languages") @@ -187,6 +195,7 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("test") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); + let update = matches.is_present("update"); let filter = matches.value_of("filter"); let languages = loader.languages_at_path(¤t_dir)?; let language = languages @@ -200,7 +209,7 @@ fn run() -> error::Result<()> { test_corpus_dir = current_dir.join("corpus"); } if test_corpus_dir.is_dir() { - test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter)?; + test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?; } // Check that all of the queries are valid. @@ -214,24 +223,33 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("parse") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); + let debug_xml = matches.is_present("debug-xml"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); let edits = matches .values_of("edits") .map_or(Vec::new(), |e| e.collect()); - let allow_cancellation = matches.is_present("allow-cancellation"); + let cancellation_flag = util::cancel_on_stdin(); + let timeout = matches .value_of("timeout") .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); - let paths = collect_paths(matches.values_of("path").unwrap())?; + + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap(); let mut has_error = false; loader.find_all_languages(&config.parser_directories)?; + + let should_track_stats = matches.is_present("stat"); + let mut stats = parse::Stats::default(); + for path in paths { let path = Path::new(&path); let language = select_language(&mut loader, path, ¤t_dir, matches.value_of("scope"))?; - has_error |= parse::parse_file_at_path( + + let this_file_errored = parse::parse_file_at_path( language, path, &edits, @@ -241,44 +259,76 @@ fn run() -> error::Result<()> { timeout, debug, debug_graph, - allow_cancellation, + debug_xml, + Some(&cancellation_flag), )?; + + if should_track_stats { + stats.total_parses += 1; + if !this_file_errored { + stats.successful_parses += 1; + } + } + + has_error |= this_file_errored; } + + if should_track_stats { + println!("{}", stats) + } + if has_error { return Error::err(String::new()); } } else if let Some(matches) = matches.subcommand_matches("query") { let ordered_captures = matches.values_of("captures").is_some(); - let paths = matches - .values_of("path") - .unwrap() - .into_iter() - .map(Path::new) - .collect::>(); + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; loader.find_all_languages(&config.parser_directories)?; let language = select_language( &mut loader, - paths[0], + Path::new(&paths[0]), ¤t_dir, matches.value_of("scope"), )?; let query_path = Path::new(matches.value_of("query-path").unwrap()); - query::query_files_at_paths(language, paths, query_path, ordered_captures)?; + let range = matches.value_of("byte-range").map(|br| { + let r: Vec<&str> = br.split(":").collect(); + (r[0].parse().unwrap(), r[1].parse().unwrap()) + }); + let should_test = matches.is_present("test"); + query::query_files_at_paths( + language, + paths, + query_path, + ordered_captures, + range, + should_test, + )?; } else if let Some(matches) = matches.subcommand_matches("tags") { loader.find_all_languages(&config.parser_directories)?; - let paths = collect_paths(matches.values_of("inputs").unwrap())?; - tags::generate_tags(&loader, matches.value_of("scope"), &paths)?; + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + tags::generate_tags( + &loader, + matches.value_of("scope"), + &paths, + matches.is_present("quiet"), + matches.is_present("time"), + )?; } else if let Some(matches) = matches.subcommand_matches("highlight") { loader.configure_highlights(&config.theme.highlight_names); loader.find_all_languages(&config.parser_directories)?; let time = matches.is_present("time"); - let paths = collect_paths(matches.values_of("path").unwrap())?; - let html_mode = matches.is_present("html"); - if html_mode { + let quiet = matches.is_present("quiet"); + let html_mode = quiet || matches.is_present("html"); + let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + + if html_mode && !quiet { println!("{}", highlight::HTML_HEADER); } + let cancellation_flag = util::cancel_on_stdin(); + let mut lang = None; if let Some(scope) = matches.value_of("scope") { lang = loader.language_configuration_for_scope(scope)?; @@ -303,23 +353,38 @@ fn run() -> error::Result<()> { if let Some(highlight_config) = language_config.highlight_config(language)? { let source = fs::read(path)?; if html_mode { - highlight::html(&loader, &config.theme, &source, highlight_config, time)?; + highlight::html( + &loader, + &config.theme, + &source, + highlight_config, + quiet, + time, + )?; } else { - highlight::ansi(&loader, &config.theme, &source, highlight_config, time)?; + highlight::ansi( + &loader, + &config.theme, + &source, + highlight_config, + time, + Some(&cancellation_flag), + )?; } } else { eprintln!("No syntax highlighting config found for path {:?}", path); } } - if html_mode { + if html_mode && !quiet { println!("{}", highlight::HTML_FOOTER); } } else if let Some(matches) = matches.subcommand_matches("build-wasm") { let grammar_path = current_dir.join(matches.value_of("path").unwrap_or("")); wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?; - } else if matches.subcommand_matches("web-ui").is_some() { - web_ui::serve(¤t_dir); + } else if let Some(matches) = matches.subcommand_matches("web-ui") { + let open_in_browser = !matches.is_present("quiet"); + web_ui::serve(¤t_dir, open_in_browser); } else if matches.subcommand_matches("dump-languages").is_some() { loader.find_all_languages(&config.parser_directories)?; for (configuration, language_path) in loader.get_all_language_configurations() { @@ -345,39 +410,64 @@ fn run() -> error::Result<()> { Ok(()) } -fn collect_paths<'a>(paths: impl Iterator) -> error::Result> { - let mut result = Vec::new(); +fn collect_paths<'a>( + paths_file: Option<&str>, + paths: Option>, +) -> error::Result> { + if let Some(paths_file) = paths_file { + return Ok(fs::read_to_string(paths_file) + .map_err(Error::wrap(|| { + format!("Failed to read paths file {}", paths_file) + }))? + .trim() + .split_ascii_whitespace() + .map(String::from) + .collect::>()); + } - let mut incorporate_path = |path: &str, positive| { - if positive { - result.push(path.to_string()); - } else { - if let Some(index) = result.iter().position(|p| p == path) { - result.remove(index); + if let Some(paths) = paths { + let mut result = Vec::new(); + + let mut incorporate_path = |path: &str, positive| { + if positive { + result.push(path.to_string()); + } else { + if let Some(index) = result.iter().position(|p| p == path) { + result.remove(index); + } } - } - }; + }; - for mut path in paths { - let mut positive = true; - if path.starts_with("!") { - positive = false; - path = path.trim_start_matches("!"); - } + for mut path in paths { + let mut positive = true; + if path.starts_with("!") { + positive = false; + path = path.trim_start_matches("!"); + } - if Path::new(path).exists() { - incorporate_path(path, positive); - } else { - let paths = - glob(path).map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?; - for path in paths { - if let Some(path) = path?.to_str() { - incorporate_path(path, positive); + if Path::new(path).exists() { + incorporate_path(path, positive); + } else { + let paths = glob(path) + .map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?; + for path in paths { + if let Some(path) = path?.to_str() { + incorporate_path(path, positive); + } } } } + + if result.is_empty() { + Error::err( + "No files were found at or matched by the provided pathname/glob".to_string(), + )?; + } + + return Ok(result); } - Ok(result) + + Err(Error::new("Must provide one or more paths".to_string())) } fn select_language( diff --git a/cli/src/parse.rs b/cli/src/parse.rs index d1ddb49924..5266b19f56 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -2,9 +2,9 @@ use super::error::{Error, Result}; use super::util; use std::io::{self, Write}; use std::path::Path; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::AtomicUsize; use std::time::Instant; -use std::{fs, thread, usize}; +use std::{fmt, fs, usize}; use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree}; #[derive(Debug)] @@ -14,6 +14,22 @@ pub struct Edit { pub inserted_text: Vec, } +#[derive(Debug, Default)] +pub struct Stats { + pub successful_parses: usize, + pub total_parses: usize, +} + +impl fmt::Display for Stats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%", + self.total_parses, + self.successful_parses, + self.total_parses - self.successful_parses, + (self.successful_parses as f64) / (self.total_parses as f64) * 100.0); + } +} + pub fn parse_file_at_path( language: Language, path: &Path, @@ -24,7 +40,8 @@ pub fn parse_file_at_path( timeout: u64, debug: bool, debug_graph: bool, - allow_cancellation: bool, + debug_xml: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result { let mut _log_session = None; let mut parser = Parser::new(); @@ -35,16 +52,7 @@ pub fn parse_file_at_path( // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. - if allow_cancellation { - let flag = Box::new(AtomicUsize::new(0)); - unsafe { parser.set_cancellation_flag(Some(&flag)) }; - thread::spawn(move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - eprintln!("Cancelling"); - flag.store(1, Ordering::Relaxed); - }); - } + unsafe { parser.set_cancellation_flag(cancellation_flag) }; // Set a timeout based on the `--time` flag. parser.set_timeout_micros(timeout); @@ -70,10 +78,18 @@ pub fn parse_file_at_path( let mut stdout = stdout.lock(); if let Some(mut tree) = tree { - for edit in edits { + if debug_graph && !edits.is_empty() { + println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); + } + + for (i, edit) in edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; perform_edit(&mut tree, &mut source_code, &edit); tree = parser.parse(&source_code, Some(&tree)).unwrap(); + + if debug_graph { + println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code)); + } } let duration = time.elapsed(); @@ -136,6 +152,60 @@ pub fn parse_file_at_path( println!(""); } + if debug_xml { + let mut needs_newline = false; + let mut indent_level = 0; + let mut did_visit_children = false; + let mut tags: Vec<&str> = Vec::new(); + loop { + let node = cursor.node(); + let is_named = node.is_named(); + if did_visit_children { + if is_named { + let tag = tags.pop(); + write!(&mut stdout, "\n", tag.expect("there is a tag"))?; + needs_newline = true; + } + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + if is_named { + if needs_newline { + stdout.write(b"\n")?; + } + for _ in 0..indent_level { + stdout.write(b" ")?; + } + write!(&mut stdout, "<{}", node.kind())?; + if let Some(field_name) = cursor.field_name() { + write!(&mut stdout, " type=\"{}\"", field_name)?; + } + write!(&mut stdout, ">")?; + tags.push(node.kind()); + needs_newline = true; + } + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + let start = node.start_byte(); + let end = node.end_byte(); + let value = std::str::from_utf8(&source_code[start..end]).expect("has a string"); + write!(&mut stdout, "{}", html_escape::encode_text(value))?; + } + } + } + cursor.reset(tree.root_node()); + println!(""); + } + let mut first_error = None; loop { let node = cursor.node(); diff --git a/cli/src/query.rs b/cli/src/query.rs index 4724227336..485fdb82c3 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -1,4 +1,5 @@ use super::error::{Error, Result}; +use crate::query_testing; use std::fs; use std::io::{self, Write}; use std::path::Path; @@ -6,9 +7,11 @@ use tree_sitter::{Language, Node, Parser, Query, QueryCursor}; pub fn query_files_at_paths( language: Language, - paths: Vec<&Path>, + paths: Vec, query_path: &Path, ordered_captures: bool, + range: Option<(usize, usize)>, + should_test: bool, ) -> Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); @@ -20,14 +23,19 @@ pub fn query_files_at_paths( .map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?; let mut query_cursor = QueryCursor::new(); + if let Some((beg, end)) = range { + query_cursor.set_byte_range(beg, end); + } let mut parser = Parser::new(); parser.set_language(language).map_err(|e| e.to_string())?; for path in paths { - writeln!(&mut stdout, "{}", path.to_str().unwrap())?; + let mut results = Vec::new(); + + writeln!(&mut stdout, "{}", path)?; - let source_code = fs::read(path).map_err(Error::wrap(|| { + let source_code = fs::read(&path).map_err(Error::wrap(|| { format!("Error reading source file {:?}", path) }))?; let text_callback = |n: Node| &source_code[n.byte_range()]; @@ -38,14 +46,20 @@ pub fn query_files_at_paths( query_cursor.captures(&query, tree.root_node(), text_callback) { let capture = mat.captures[capture_index]; + let capture_name = &query.capture_names()[capture.index as usize]; writeln!( &mut stdout, " pattern: {}, capture: {}, row: {}, text: {:?}", mat.pattern_index, - &query.capture_names()[capture.index as usize], + capture_name, capture.node.start_position().row, capture.node.utf8_text(&source_code).unwrap_or("") )?; + results.push(query_testing::CaptureInfo { + name: capture_name.to_string(), + start: capture.node.start_position(), + end: capture.node.end_position(), + }); } } else { for m in query_cursor.matches(&query, tree.root_node(), text_callback) { @@ -53,11 +67,12 @@ pub fn query_files_at_paths( for capture in m.captures { let start = capture.node.start_position(); let end = capture.node.end_position(); + let capture_name = &query.capture_names()[capture.index as usize]; if end.row == start.row { writeln!( &mut stdout, " capture: {}, start: {}, text: {:?}", - &query.capture_names()[capture.index as usize], + capture_name, start, capture.node.utf8_text(&source_code).unwrap_or("") )?; @@ -65,14 +80,20 @@ pub fn query_files_at_paths( writeln!( &mut stdout, " capture: {}, start: {}, end: {}", - &query.capture_names()[capture.index as usize], - start, - end, + capture_name, start, end, )?; } + results.push(query_testing::CaptureInfo { + name: capture_name.to_string(), + start: capture.node.start_position(), + end: capture.node.end_position(), + }); } } } + if should_test { + query_testing::assert_expected_captures(results, path, &mut parser, language)? + } } Ok(()) diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs new file mode 100644 index 0000000000..ef02ec69e2 --- /dev/null +++ b/cli/src/query_testing.rs @@ -0,0 +1,150 @@ +use crate::error; +use crate::error::Result; +use lazy_static::lazy_static; +use regex::Regex; +use std::fs; +use tree_sitter::{Language, Parser, Point}; + +lazy_static! { + static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); +} + +#[derive(Debug, Eq, PartialEq)] +pub struct CaptureInfo { + pub name: String, + pub start: Point, + pub end: Point, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Assertion { + pub position: Point, + pub expected_capture_name: String, +} + +/// Parse the given source code, finding all of the comments that contain +/// highlighting assertions. Return a vector of (position, expected highlight name) +/// pairs. +pub fn parse_position_comments( + parser: &mut Parser, + language: Language, + source: &[u8], +) -> Result> { + let mut result = Vec::new(); + let mut assertion_ranges = Vec::new(); + + // Parse the code. + parser.set_included_ranges(&[]).unwrap(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + // Walk the tree, finding comment nodes that contain assertions. + let mut ascending = false; + let mut cursor = tree.root_node().walk(); + loop { + if ascending { + let node = cursor.node(); + + // Find every comment node. + if node.kind().contains("comment") { + if let Ok(text) = node.utf8_text(source) { + let mut position = node.start_position(); + if position.row == 0 { + continue; + } + + // Find the arrow character ("^" or '<-") in the comment. A left arrow + // refers to the column where the comment node starts. An up arrow refers + // to its own column. + let mut has_left_caret = false; + let mut has_arrow = false; + let mut arrow_end = 0; + for (i, c) in text.char_indices() { + arrow_end = i + 1; + if c == '-' && has_left_caret { + has_arrow = true; + break; + } + if c == '^' { + has_arrow = true; + position.column += i; + break; + } + has_left_caret = c == '<'; + } + + // If the comment node contains an arrow and a highlight name, record the + // highlight name and the position. + if let (true, Some(mat)) = + (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..])) + { + assertion_ranges.push((node.start_position(), node.end_position())); + result.push(Assertion { + position: position, + expected_capture_name: mat.as_str().to_string(), + }); + } + } + } + + // Continue walking the tree. + if cursor.goto_next_sibling() { + ascending = false; + } else if !cursor.goto_parent() { + break; + } + } else if !cursor.goto_first_child() { + ascending = true; + } + } + + // Adjust the row number in each assertion's position to refer to the line of + // code *above* the assertion. There can be multiple lines of assertion comments, + // so the positions may have to be decremented by more than one row. + let mut i = 0; + for assertion in result.iter_mut() { + loop { + let on_assertion_line = assertion_ranges[i..] + .iter() + .any(|(start, _)| start.row == assertion.position.row); + if on_assertion_line { + assertion.position.row -= 1; + } else { + while i < assertion_ranges.len() + && assertion_ranges[i].0.row < assertion.position.row + { + i += 1; + } + break; + } + } + } + + // The assertions can end up out of order due to the line adjustments. + result.sort_unstable_by_key(|a| a.position); + + Ok(result) +} + +pub fn assert_expected_captures( + infos: Vec, + path: String, + parser: &mut Parser, + language: Language, +) -> Result<()> { + let contents = fs::read_to_string(path)?; + let pairs = parse_position_comments(parser, language, contents.as_bytes())?; + for info in &infos { + if let Some(found) = pairs.iter().find(|p| { + p.position.row == info.start.row && p.position >= info.start && p.position < info.end + }) { + if found.expected_capture_name != info.name && info.name != "name" { + Err(error::Error::new(format!( + "Assertion failed: at {}, found {}, expected {}", + info.start, found.expected_capture_name, info.name + )))? + } + } + } + Ok(()) +} diff --git a/cli/src/tags.rs b/cli/src/tags.rs index d6704ec52a..802d8d0654 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -3,10 +3,17 @@ use super::util; use crate::error::{Error, Result}; use std::io::{self, Write}; use std::path::Path; +use std::time::Instant; use std::{fs, str}; use tree_sitter_tags::TagsContext; -pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> Result<()> { +pub fn generate_tags( + loader: &Loader, + scope: Option<&str>, + paths: &[String], + quiet: bool, + time: bool, +) -> Result<()> { let mut lang = None; if let Some(scope) = scope { lang = loader.language_configuration_for_scope(scope)?; @@ -34,28 +41,53 @@ pub fn generate_tags(loader: &Loader, scope: Option<&str>, paths: &[String]) -> }; if let Some(tags_config) = language_config.tags_config(language)? { - let path_str = format!("{:?}", path); - writeln!(&mut stdout, "{}", &path_str[1..path_str.len() - 1])?; + let indent; + if paths.len() > 1 { + if !quiet { + writeln!(&mut stdout, "{}", path.to_string_lossy())?; + } + indent = "\t" + } else { + indent = ""; + }; let source = fs::read(path)?; - for tag in context.generate_tags(tags_config, &source, Some(&cancellation_flag))? { + let t0 = Instant::now(); + for tag in context + .generate_tags(tags_config, &source, Some(&cancellation_flag))? + .0 + { let tag = tag?; - write!( - &mut stdout, - " {:<8} {:<40}\t{:>9}-{:<9}", - tag.kind, - str::from_utf8(&source[tag.name_range]).unwrap_or(""), - tag.span.start, - tag.span.end, - )?; - if let Some(docs) = tag.docs { - if docs.len() > 120 { - write!(&mut stdout, "\t{:?}...", &docs[0..120])?; - } else { - write!(&mut stdout, "\t{:?}", &docs)?; + if !quiet { + write!( + &mut stdout, + "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", + indent, + str::from_utf8(&source[tag.name_range]).unwrap_or(""), + &tags_config.syntax_type_name(tag.syntax_type_id), + if tag.is_definition { "def" } else { "ref" }, + tag.span.start, + tag.span.end, + str::from_utf8(&source[tag.line_range]).unwrap_or(""), + )?; + if let Some(docs) = tag.docs { + if docs.len() > 120 { + write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?; + } else { + write!(&mut stdout, "\t{:?}", &docs)?; + } } + writeln!(&mut stdout, "")?; } - writeln!(&mut stdout, "")?; + } + + if time { + writeln!( + &mut stdout, + "{}time: {}ms", + indent, + t0.elapsed().as_millis(), + )?; } } else { eprintln!("No tags config found for path {:?}", path); diff --git a/cli/src/test.rs b/cli/src/test.rs index 1806c15020..c8cfe89f30 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -6,9 +6,10 @@ use lazy_static::lazy_static; use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; use regex::Regex; use std::char; +use std::fmt::Write as FmtWrite; use std::fs; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str; use tree_sitter::{Language, LogType, Parser, Query}; @@ -30,6 +31,7 @@ pub enum TestEntry { Group { name: String, children: Vec, + file_path: Option, }, Example { name: String, @@ -44,6 +46,7 @@ impl Default for TestEntry { TestEntry::Group { name: String::new(), children: Vec::new(), + file_path: None, } } } @@ -54,6 +57,7 @@ pub fn run_tests_at_path( debug: bool, debug_graph: bool, filter: Option<&str>, + update: bool, ) -> Result<()> { let test_entry = parse_tests(path)?; let mut _log_session = None; @@ -72,27 +76,45 @@ pub fn run_tests_at_path( } let mut failures = Vec::new(); - if let TestEntry::Group { children, .. } = test_entry { - for child in children { - run_tests(&mut parser, child, filter, 0, &mut failures)?; - } - } + let mut corrected_entries = Vec::new(); + run_tests( + &mut parser, + test_entry, + filter, + 0, + &mut failures, + update, + &mut corrected_entries, + )?; if failures.len() > 0 { println!(""); - if failures.len() == 1 { - println!("1 failure:") + if update { + if failures.len() == 1 { + println!("1 update:\n") + } else { + println!("{} updates:\n", failures.len()) + } + + for (i, (name, ..)) in failures.iter().enumerate() { + println!(" {}. {}", i + 1, name); + } + Ok(()) } else { - println!("{} failures:", failures.len()) - } + if failures.len() == 1 { + println!("1 failure:") + } else { + println!("{} failures:", failures.len()) + } - print_diff_key(); - for (i, (name, actual, expected)) in failures.iter().enumerate() { - println!("\n {}. {}:", i + 1, name); - print_diff(actual, expected); + print_diff_key(); + for (i, (name, actual, expected)) in failures.iter().enumerate() { + println!("\n {}. {}:", i + 1, name); + print_diff(actual, expected); + } + Error::err(String::new()) } - Error::err(String::new()) } else { Ok(()) } @@ -102,14 +124,14 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { if path.exists() { for entry in fs::read_dir(path)? { let entry = entry?; - let hidden = entry.file_name().to_str().unwrap_or("").starts_with("."); + let filepath = entry.file_name(); + let filepath = filepath.to_str().unwrap_or(""); + let hidden = filepath.starts_with("."); if !hidden { let content = fs::read_to_string(entry.path()).map_err(Error::wrap(|| { format!("Error reading query file {:?}", entry.file_name()) }))?; - Query::new(language, &content).map_err(Error::wrap(|| { - format!("Error in query file {:?}", entry.file_name()) - }))?; + Query::new(language, &content).map_err(|e| (filepath, e))?; } } } @@ -149,6 +171,8 @@ fn run_tests( filter: Option<&str>, mut indent_level: i32, failures: &mut Vec<(String, String, String)>, + update: bool, + corrected_entries: &mut Vec<(String, String, String)>, ) -> Result<()> { match test_entry { TestEntry::Example { @@ -159,6 +183,11 @@ fn run_tests( } => { if let Some(filter) = filter { if !name.contains(filter) { + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&output); + corrected_entries.push((name, input, output)); + } return Ok(()); } } @@ -172,25 +201,138 @@ fn run_tests( } if actual == output { println!("āœ“ {}", Colour::Green.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&output); + corrected_entries.push((name, input, output)); + } } else { - println!("āœ— {}", Colour::Red.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&actual); + corrected_entries.push((name.clone(), input, output)); + println!("āœ“ {}", Colour::Blue.paint(&name)); + } else { + println!("āœ— {}", Colour::Red.paint(&name)); + } failures.push((name, actual, output)); } } - TestEntry::Group { name, children } => { - for _ in 0..indent_level { - print!(" "); + TestEntry::Group { + name, + children, + file_path, + } => { + if indent_level > 0 { + for _ in 0..indent_level { + print!(" "); + } + println!("{}:", name); } - println!("{}:", name); + + let failure_count = failures.len(); + indent_level += 1; for child in children { - run_tests(parser, child, filter, indent_level, failures)?; + run_tests( + parser, + child, + filter, + indent_level, + failures, + update, + corrected_entries, + )?; + } + + if let Some(file_path) = file_path { + if update && failures.len() - failure_count > 0 { + write_tests(&file_path, corrected_entries)?; + } + corrected_entries.clear(); } } } Ok(()) } +fn format_sexp(sexp: &String) -> String { + let mut formatted = String::new(); + + let mut indent_level = 0; + let mut has_field = false; + let mut s_iter = sexp.split(|c| c == ' ' || c == ')'); + while let Some(s) = s_iter.next() { + if s.is_empty() { + // ")" + indent_level -= 1; + write!(formatted, ")").unwrap(); + } else if s.starts_with('(') { + if has_field { + has_field = false; + } else { + if indent_level > 0 { + writeln!(formatted, "").unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + } + indent_level += 1; + } + + // "(node_name" + write!(formatted, "{}", s).unwrap(); + + let mut c_iter = s.chars(); + c_iter.next(); + let second_char = c_iter.next().unwrap(); + if second_char == 'M' || second_char == 'U' { + // "(MISSING node_name" or "(UNEXPECTED 'x'" + let s = s_iter.next().unwrap(); + write!(formatted, " {}", s).unwrap(); + } + } else if s.ends_with(':') { + // "field:" + writeln!(formatted, "").unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + write!(formatted, "{} ", s).unwrap(); + has_field = true; + indent_level += 1; + } + } + + formatted +} + +fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> { + let mut buffer = fs::File::create(file_path)?; + write_tests_to_buffer(&mut buffer, corrected_entries) +} + +fn write_tests_to_buffer( + buffer: &mut impl Write, + corrected_entries: &Vec<(String, String, String)>, +) -> Result<()> { + for (i, (name, input, output)) in corrected_entries.iter().enumerate() { + if i > 0 { + write!(buffer, "\n")?; + } + write!( + buffer, + "{}\n{}\n{}\n{}\n{}\n\n{}\n", + "=".repeat(80), + name, + "=".repeat(80), + input, + "-".repeat(80), + output.trim() + )?; + } + Ok(()) +} + pub fn parse_tests(path: &Path) -> io::Result { let name = path .file_stem() @@ -206,10 +348,14 @@ pub fn parse_tests(path: &Path) -> io::Result { children.push(parse_tests(&entry.path())?); } } - Ok(TestEntry::Group { name, children }) + Ok(TestEntry::Group { + name, + children, + file_path: None, + }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content)) + Ok(parse_test_content(name, content, Some(path.to_path_buf()))) } } @@ -217,7 +363,7 @@ pub fn strip_sexp_fields(sexp: String) -> String { SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() } -fn parse_test_content(name: String, content: String) -> TestEntry { +fn parse_test_content(name: String, content: String, file_path: Option) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut prev_name = String::new(); @@ -268,7 +414,11 @@ fn parse_test_content(name: String, content: String) -> TestEntry { .to_string(); prev_header_end = header_end; } - TestEntry::Group { name, children } + TestEntry::Group { + name, + children, + file_path, + } } #[cfg(test)] @@ -300,6 +450,7 @@ d "# .trim() .to_string(), + None, ); assert_eq!( @@ -319,7 +470,8 @@ d output: "(d)".to_string(), has_fields: false, }, - ] + ], + file_path: None, } ); } @@ -352,6 +504,7 @@ abc "# .trim() .to_string(), + None, ); assert_eq!( @@ -371,8 +524,67 @@ abc output: "(c (d))".to_string(), has_fields: false, }, - ] + ], + file_path: None, } ); } + + #[test] + fn test_format_sexp() { + assert_eq!( + format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()), + r#" +(a + b: (c) + (d) + e: (f + (g + (h + (MISSING i))))) +"# + .trim() + .to_string() + ); + } + + #[test] + fn test_write_tests_to_buffer() { + let mut buffer = Vec::new(); + let corrected_entries = vec![ + ( + "title 1".to_string(), + "input 1".to_string(), + "output 1".to_string(), + ), + ( + "title 2".to_string(), + "input 2".to_string(), + "output 2".to_string(), + ), + ]; + write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); + assert_eq!( + String::from_utf8(buffer).unwrap(), + r#" +================================================================================ +title 1 +================================================================================ +input 1 +-------------------------------------------------------------------------------- + +output 1 + +================================================================================ +title 2 +================================================================================ +input 2 +-------------------------------------------------------------------------------- + +output 2 +"# + .trim_start() + .to_string() + ); + } } diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 2011af4091..df870bf6d3 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -1,17 +1,12 @@ use super::error::Result; use crate::loader::Loader; +use crate::query_testing::{parse_position_comments, Assertion}; use ansi_term::Colour; -use lazy_static::lazy_static; -use regex::Regex; use std::fs; use std::path::Path; -use tree_sitter::{Language, Parser, Point}; +use tree_sitter::Point; use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter}; -lazy_static! { - static ref HIGHLIGHT_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); -} - pub struct Failure { row: usize, column: usize, @@ -86,23 +81,20 @@ pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { Ok(()) } } - -pub fn test_highlight( - loader: &Loader, - highlighter: &mut Highlighter, - highlight_config: &HighlightConfiguration, - source: &[u8], +pub fn iterate_assertions( + assertions: &Vec, + highlights: &Vec<(Point, Point, Highlight)>, + highlight_names: &Vec, ) -> Result { - // Highlight the file, and parse out all of the highlighting assertions. - let highlight_names = loader.highlight_names(); - let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; - let assertions = parse_highlight_test(highlighter.parser(), highlight_config.language, source)?; - // Iterate through all of the highlighting assertions, checking each one against the // actual highlights. let mut i = 0; let mut actual_highlights = Vec::<&String>::new(); - for (position, expected_highlight) in &assertions { + for Assertion { + position, + expected_capture_name: expected_highlight, + } in assertions + { let mut passed = false; actual_highlights.clear(); @@ -156,102 +148,80 @@ pub fn test_highlight( Ok(assertions.len()) } -/// Parse the given source code, finding all of the comments that contain -/// highlighting assertions. Return a vector of (position, expected highlight name) -/// pairs. -pub fn parse_highlight_test( - parser: &mut Parser, - language: Language, +pub fn test_highlight( + loader: &Loader, + highlighter: &mut Highlighter, + highlight_config: &HighlightConfiguration, source: &[u8], -) -> Result> { - let mut result = Vec::new(); - let mut assertion_ranges = Vec::new(); +) -> Result { + // Highlight the file, and parse out all of the highlighting assertions. + let highlight_names = loader.highlight_names(); + let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; + let assertions = + parse_position_comments(highlighter.parser(), highlight_config.language, source)?; - // Parse the code. - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); + iterate_assertions(&assertions, &highlights, &highlight_names)?; - // Walk the tree, finding comment nodes that contain assertions. - let mut ascending = false; - let mut cursor = tree.root_node().walk(); - loop { - if ascending { - let node = cursor.node(); + // Iterate through all of the highlighting assertions, checking each one against the + // actual highlights. + let mut i = 0; + let mut actual_highlights = Vec::<&String>::new(); + for Assertion { + position, + expected_capture_name: expected_highlight, + } in &assertions + { + let mut passed = false; + actual_highlights.clear(); - // Find every comment node. - if node.kind().contains("comment") { - if let Ok(text) = node.utf8_text(source) { - let mut position = node.start_position(); - if position.row == 0 { - continue; - } + 'highlight_loop: loop { + // The assertions are ordered by position, so skip past all of the highlights that + // end at or before this assertion's position. + if let Some(highlight) = highlights.get(i) { + if highlight.1 <= *position { + i += 1; + continue; + } - // Find the arrow character ("^" or '<-") in the comment. A left arrow - // refers to the column where the comment node starts. An up arrow refers - // to its own column. - let mut has_left_caret = false; - let mut has_arrow = false; - let mut arrow_end = 0; - for (i, c) in text.char_indices() { - arrow_end = i + 1; - if c == '-' && has_left_caret { - has_arrow = true; - break; - } - if c == '^' { - has_arrow = true; - position.column += i; - break; - } - has_left_caret = c == '<'; + // Iterate through all of the highlights that start at or before this assertion's, + // position, looking for one that matches the assertion. + let mut j = i; + while let (false, Some(highlight)) = (passed, highlights.get(j)) { + if highlight.0 > *position { + break 'highlight_loop; } - // If the comment node contains an arrow and a highlight name, record the - // highlight name and the position. - if let (true, Some(mat)) = - (has_arrow, HIGHLIGHT_NAME_REGEX.find(&text[arrow_end..])) - { - assertion_ranges.push((node.start_position(), node.end_position())); - result.push((position, mat.as_str().to_string())); + // If the highlight matches the assertion, this test passes. Otherwise, + // add this highlight to the list of actual highlights that span the + // assertion's position, in order to generate an error message in the event + // of a failure. + let highlight_name = &highlight_names[(highlight.2).0]; + if *highlight_name == *expected_highlight { + passed = true; + break 'highlight_loop; + } else { + actual_highlights.push(highlight_name); } - } - } - // Continue walking the tree. - if cursor.goto_next_sibling() { - ascending = false; - } else if !cursor.goto_parent() { + j += 1; + } + } else { break; } - } else if !cursor.goto_first_child() { - ascending = true; } - } - // Adjust the row number in each assertion's position to refer to the line of - // code *above* the assertion. There can be multiple lines of assertion comments, - // so the positions may have to be decremented by more than one row. - let mut i = 0; - for (position, _) in result.iter_mut() { - loop { - let on_assertion_line = assertion_ranges[i..] - .iter() - .any(|(start, _)| start.row == position.row); - if on_assertion_line { - position.row -= 1; - } else { - while i < assertion_ranges.len() && assertion_ranges[i].0.row < position.row { - i += 1; - } - break; + if !passed { + return Err(Failure { + row: position.row, + column: position.column, + expected_highlight: expected_highlight.clone(), + actual_highlights: actual_highlights.into_iter().cloned().collect(), } + .into()); } } - // The assertions can end up out of order due to the line adjustments. - result.sort_unstable_by_key(|a| a.0); - - Ok(result) + Ok(assertions.len()) } pub fn get_highlight_positions( diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index fd2ed90828..202dcd70d8 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -24,6 +24,8 @@ const LANGUAGES: &'static [&'static str] = &[ "json", "php", "python", + "ruby", + "rust", ]; lazy_static! { @@ -388,7 +390,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { } result.push((name, input, output, has_fields)); } - TestEntry::Group { mut name, children } => { + TestEntry::Group { mut name, children, .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); name.insert_str(0, prefix); diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index 1f7106dd19..6b09d64c50 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -588,7 +588,7 @@ fn test_highlighting_via_c_api() { #[test] fn test_decode_utf8_lossy() { - use tree_sitter_highlight::util::LossyUtf8; + use tree_sitter::LossyUtf8; let parts = LossyUtf8::new(b"hi").collect::>(); assert_eq!(parts, vec!["hi"]); diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index ac54db004d..24e8160efb 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -3,6 +3,7 @@ mod helpers; mod highlight_test; mod node_test; mod parser_test; +mod pathological_test; mod query_test; mod tags_test; mod test_highlight_test; diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index d4a5a3f98a..7e652cd550 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -739,7 +739,7 @@ fn test_node_numeric_symbols_respect_simple_aliases() { let root = tree.root_node(); assert_eq!( root.to_sexp(), - "(program (binary left: (unary (identifier)) right: (identifier)))", + "(program (binary left: (unary operand: (identifier)) right: (identifier)))", ); let binary_node = root.child(0).unwrap(); diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 392d1a9ded..b2b2560e4d 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -395,6 +395,18 @@ fn test_parsing_after_editing_end_of_code() { assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]); } +#[test] +fn test_parsing_empty_file_with_reused_tree() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let tree = parser.parse("", None); + parser.parse("", tree.as_ref()); + + let tree = parser.parse("\n ", None); + parser.parse("\n ", tree.as_ref()); +} + // Thread safety #[test] diff --git a/cli/src/tests/pathological_test.rs b/cli/src/tests/pathological_test.rs new file mode 100644 index 0000000000..7ebd543911 --- /dev/null +++ b/cli/src/tests/pathological_test.rs @@ -0,0 +1,15 @@ +use super::helpers::allocations; +use super::helpers::fixtures::get_language; +use tree_sitter::Parser; + +#[test] +fn test_pathological_example_1() { + let language = "cpp"; + let source = r#"*ss(qqX = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok(); +} + #[test] fn test_query_errors_on_invalid_syntax() { allocations::record(|| { let language = get_language("javascript"); assert!(Query::new(language, "(if_statement)").is_ok()); - assert!(Query::new(language, "(if_statement condition:(identifier))").is_ok()); + assert!(Query::new( + language, + "(if_statement condition:(parenthesized_expression (identifier)))" + ) + .is_ok()); // Mismatched parens assert_eq!( - Query::new(language, "(if_statement"), - Err(QueryError::Syntax( - 1, - [ - "(if_statement", // - " ^", - ] - .join("\n") - )) + Query::new(language, "(if_statement").unwrap_err().message, + [ + "(if_statement", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, "; comment 1\n; comment 2\n (if_statement))"), - Err(QueryError::Syntax( - 3, - [ - " (if_statement))", // - " ^", - ] - .join("\n") - )) + Query::new(language, "; comment 1\n; comment 2\n (if_statement))") + .unwrap_err() + .message, + [ + " (if_statement))", // + " ^", + ] + .join("\n") ); // Return an error at the *beginning* of a bare identifier not followed a colon. // If there's a colon but no pattern, return an error at the end of the colon. assert_eq!( - Query::new(language, "(if_statement identifier)"), - Err(QueryError::Syntax( - 1, - [ - "(if_statement identifier)", // - " ^", - ] - .join("\n") - )) + Query::new(language, "(if_statement identifier)") + .unwrap_err() + .message, + [ + "(if_statement identifier)", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, "(if_statement condition:)"), - Err(QueryError::Syntax( - 1, - [ - "(if_statement condition:)", // - " ^", - ] - .join("\n") - )) + Query::new(language, "(if_statement condition:)") + .unwrap_err() + .message, + [ + "(if_statement condition:)", // + " ^", + ] + .join("\n") ); // Return an error at the beginning of an unterminated string. assert_eq!( - Query::new(language, r#"(identifier) "h "#), - Err(QueryError::Syntax( - 1, - [ - r#"(identifier) "h "#, // - r#" ^"#, - ] - .join("\n") - )) + Query::new(language, r#"(identifier) "h "#) + .unwrap_err() + .message, + [ + r#"(identifier) "h "#, // + r#" ^"#, + ] + .join("\n") ); assert_eq!( - Query::new(language, r#"((identifier) ()"#), - Err(QueryError::Syntax( - 1, - [ - "((identifier) ()", // - " ^", - ] - .join("\n") - )) + Query::new(language, r#"((identifier) ()"#) + .unwrap_err() + .message, + [ + "((identifier) ()", // + " ^", + ] + .join("\n") ); assert_eq!( - Query::new(language, r#"((identifier) @x (eq? @x a"#), - Err(QueryError::Syntax( - 1, - [ - r#"((identifier) @x (eq? @x a"#, - r#" ^"#, - ] - .join("\n") - )) + Query::new(language, r#"((identifier) [])"#) + .unwrap_err() + .message, + [ + "((identifier) [])", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, r#"((identifier) (#a)"#) + .unwrap_err() + .message, + [ + "((identifier) (#a)", // + " ^", + ] + .join("\n") + ); + assert_eq!( + Query::new(language, r#"((identifier) @x (#eq? @x a"#) + .unwrap_err() + .message, + [ + r#"((identifier) @x (#eq? @x a"#, + r#" ^"#, + ] + .join("\n") ); }); } @@ -107,48 +128,265 @@ fn test_query_errors_on_invalid_symbols() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "(clas)"), - Err(QueryError::NodeType(1, "clas".to_string())) + Query::new(language, "(clas)").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "clas".to_string() + } ); assert_eq!( - Query::new(language, "(if_statement (arrayyyyy))"), - Err(QueryError::NodeType(1, "arrayyyyy".to_string())) + Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(), + QueryError { + row: 0, + offset: 15, + column: 15, + kind: QueryErrorKind::NodeType, + message: "arrayyyyy".to_string() + }, ); assert_eq!( - Query::new(language, "(if_statement condition: (non_existent3))"), - Err(QueryError::NodeType(1, "non_existent3".to_string())) + Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(), + QueryError { + row: 0, + offset: 26, + column: 26, + kind: QueryErrorKind::NodeType, + message: "non_existent3".to_string() + }, ); assert_eq!( - Query::new(language, "(if_statement condit: (identifier))"), - Err(QueryError::Field(1, "condit".to_string())) + Query::new(language, "(if_statement condit: (identifier))").unwrap_err(), + QueryError { + row: 0, + offset: 14, + column: 14, + kind: QueryErrorKind::Field, + message: "condit".to_string() + }, ); assert_eq!( - Query::new(language, "(if_statement conditioning: (identifier))"), - Err(QueryError::Field(1, "conditioning".to_string())) + Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(), + QueryError { + row: 0, + offset: 14, + column: 14, + kind: QueryErrorKind::Field, + message: "conditioning".to_string() + } ); }); } #[test] -fn test_query_errors_on_invalid_conditions() { +fn test_query_errors_on_invalid_predicates() { allocations::record(|| { let language = get_language("javascript"); assert_eq!( - Query::new(language, "((identifier) @id (@id))"), - Err(QueryError::Predicate( - "Expected predicate to start with a function name. Got @id.".to_string() - )) + Query::new(language, "((identifier) @id (@id))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Syntax, + row: 0, + column: 19, + offset: 19, + message: [ + "((identifier) @id (@id))", // + " ^" + ] + .join("\n") + } + ); + assert_eq!( + Query::new(language, "((identifier) @id (#eq? @id))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Predicate, + row: 0, + column: 0, + offset: 0, + message: "Wrong number of arguments to #eq? predicate. Expected 2, got 1." + .to_string() + } + ); + assert_eq!( + Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), + QueryError { + kind: QueryErrorKind::Capture, + row: 0, + column: 29, + offset: 29, + message: "ok".to_string(), + } + ); + }); +} + +#[test] +fn test_query_errors_on_impossible_patterns() { + let js_lang = get_language("javascript"); + let rb_lang = get_language("ruby"); + + allocations::record(|| { + assert_eq!( + Query::new( + js_lang, + "(binary_expression left: (identifier) left: (identifier))" + ), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 38, + column: 38, + message: [ + "(binary_expression left: (identifier) left: (identifier))", + " ^" + ] + .join("\n"), + }) + ); + + Query::new( + js_lang, + "(function_declaration name: (identifier) (statement_block))", + ) + .unwrap(); + assert_eq!( + Query::new(js_lang, "(function_declaration name: (statement_block))"), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 22, + column: 22, + message: [ + "(function_declaration name: (statement_block))", + " ^", + ] + .join("\n") + }) + ); + + Query::new(rb_lang, "(call receiver:(call))").unwrap(); + assert_eq!( + Query::new(rb_lang, "(call receiver:(binary))"), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 6, + column: 6, + message: [ + "(call receiver:(binary))", // + " ^", + ] + .join("\n") + }) + ); + + Query::new( + js_lang, + "[ + (function (identifier)) + (function_declaration (identifier)) + (generator_function_declaration (identifier)) + ]", + ) + .unwrap(); + assert_eq!( + Query::new( + js_lang, + "[ + (function (identifier)) + (function_declaration (object)) + (generator_function_declaration (identifier)) + ]", + ), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 2, + offset: 88, + column: 42, + message: [ + " (function_declaration (object))", // + " ^", + ] + .join("\n") + }) + ); + + assert_eq!( + Query::new(js_lang, "(identifier (identifier))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 12, + column: 12, + message: [ + "(identifier (identifier))", // + " ^", + ] + .join("\n") + }) + ); + assert_eq!( + Query::new(js_lang, "(true (true))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 6, + column: 6, + message: [ + "(true (true))", // + " ^", + ] + .join("\n") + }) ); + + Query::new( + js_lang, + "(if_statement + condition: (parenthesized_expression (_expression) @cond))", + ) + .unwrap(); + assert_eq!( - Query::new(language, "((identifier) @id (eq? @id))"), - Err(QueryError::Predicate( - "Wrong number of arguments to eq? predicate. Expected 2, got 1.".to_string() - )) + Query::new(js_lang, "(if_statement condition: (_expression))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 14, + column: 14, + message: [ + "(if_statement condition: (_expression))", // + " ^", + ] + .join("\n") + }) ); + }); +} + +#[test] +fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { + allocations::record(|| { + let ruby = get_language("ruby"); + + Query::new(ruby, "(destructured_parameter (identifier))").unwrap(); + assert_eq!( - Query::new(language, "((identifier) @id (eq? @id @ok))"), - Err(QueryError::Capture(1, "ok".to_string())) + Query::new(ruby, "(destructured_parameter (string))",), + Err(QueryError { + kind: QueryErrorKind::Structure, + row: 0, + offset: 24, + column: 24, + message: [ + "(destructured_parameter (string))", // + " ^", + ] + .join("\n") + }) ); }); } @@ -163,19 +401,13 @@ fn test_query_matches_with_simple_pattern() { ) .unwrap(); - let source = "function one() { two(); function three() {} }"; - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "function one() { two(); function three() {} }", &[ (0, vec![("fn-name", "one")]), - (0, vec![("fn-name", "three")]) + (0, vec![("fn-name", "three")]), ], ); }); @@ -195,7 +427,10 @@ fn test_query_matches_with_multiple_on_same_root() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " class Person { // the constructor constructor(name) { this.name = name; } @@ -203,30 +438,21 @@ fn test_query_matches_with_multiple_on_same_root() { // the getter getFullName() { return this.name; } } - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ ( 0, vec![ ("the-class-name", "Person"), - ("the-method-name", "constructor") - ] + ("the-method-name", "constructor"), + ], ), ( 0, vec![ ("the-class-name", "Person"), - ("the-method-name", "getFullName") - ] + ("the-method-name", "getFullName"), + ], ), ], ); @@ -246,20 +472,14 @@ fn test_query_matches_with_multiple_patterns_different_roots() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " function f1() { f2(f3()); } - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (0, vec![("fn-def", "f1")]), (1, vec![("fn-ref", "f2")]), @@ -287,21 +507,15 @@ fn test_query_matches_with_multiple_patterns_same_root() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " a = { b: () => { return c; }, d: function() { return d; } }; - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (1, vec![("method-def", "b")]), (0, vec![("method-def", "d")]), @@ -325,20 +539,14 @@ fn test_query_matches_with_nesting_and_no_fields() { ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " [[a]]; [[c, d], [e, f, g, h]]; [[h], [i]]; - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ (0, vec![("x1", "c"), ("x2", "d")]), (0, vec![("x1", "e"), ("x2", "f")]), @@ -353,22 +561,63 @@ fn test_query_matches_with_nesting_and_no_fields() { } #[test] -fn test_query_matches_with_many() { +fn test_query_matches_with_many_results() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new(language, "(array (identifier) @element)").unwrap(); - let source = "[hello];\n".repeat(50); + assert_query_matches( + language, + &query, + &"[hello];\n".repeat(50), + &vec![(0, vec![("element", "hello")]); 50], + ); + }); +} - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); +#[test] +fn test_query_matches_with_many_overlapping_results() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @method)) + (call_expression + function: (identifier) @function) + ((identifier) @constant + (#match? @constant "[A-Z\\d_]+")) + "#, + ) + .unwrap(); - assert_eq!( - collect_matches(matches, &query, source.as_str()), - vec![(0, vec![("element", "hello")]); 50], + let count = 1024; + + // Deeply nested chained function calls: + // a + // .foo(bar(BAZ)) + // .foo(bar(BAZ)) + // .foo(bar(BAZ)) + // ... + let mut source = "a".to_string(); + source += &"\n .foo(bar(BAZ))".repeat(count); + + assert_query_matches( + language, + &query, + &source, + &[ + (0, vec![("method", "foo")]), + (1, vec![("function", "bar")]), + (2, vec![("constant", "BAZ")]), + ] + .iter() + .cloned() + .cycle() + .take(3 * count) + .collect::>(), ); }); } @@ -385,20 +634,11 @@ fn test_query_matches_capturing_error_nodes() { ) .unwrap(); - let source = "function a(b,, c, d :e:) {}"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), - &[( - 0, - vec![("the-error", ":e:"), ("the-error-identifier", "e"),] - ),] + assert_query_matches( + language, + &query, + "function a(b,, c, d :e:) {}", + &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])], ); }); } @@ -410,8 +650,8 @@ fn test_query_matches_with_named_wildcard() { let query = Query::new( language, " - (return_statement (*) @the-return-value) - (binary_expression operator: * @the-operator) + (return_statement (_) @the-return-value) + (binary_expression operator: _ @the-operator) ", ) .unwrap(); @@ -439,14 +679,10 @@ fn test_query_matches_with_named_wildcard() { fn test_query_matches_with_wildcard_at_the_root() { allocations::record(|| { let language = get_language("javascript"); - let mut cursor = QueryCursor::new(); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let query = Query::new( language, " - (* + (_ (comment) @doc . (function_declaration @@ -455,37 +691,33 @@ fn test_query_matches_with_wildcard_at_the_root() { ) .unwrap(); - let source = "/* one */ var x; /* two */ function y() {} /* three */ class Z {}"; - - let tree = parser.parse(source, None).unwrap(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - assert_eq!( - collect_matches(matches, &query, source), - &[(0, vec![("doc", "/* two */"), ("name", "y")]),] + assert_query_matches( + language, + &query, + "/* one */ var x; /* two */ function y() {} /* three */ class Z {}", + &[(0, vec![("doc", "/* two */"), ("name", "y")])], ); let query = Query::new( language, " - (* (string) @a) - (* (number) @b) - (* (true) @c) - (* (false) @d) + (_ (string) @a) + (_ (number) @b) + (_ (true) @c) + (_ (false) @d) ", ) .unwrap(); - let source = "['hi', x(true), {y: false}]"; - - let tree = parser.parse(source, None).unwrap(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "['hi', x(true), {y: false}]", &[ (0, vec![("a", "'hi'")]), (2, vec![("c", "true")]), (3, vec![("d", "false")]), - ] + ], ); }); } @@ -514,29 +746,57 @@ fn test_query_matches_with_immediate_siblings() { .) (list . - (*) @first-element) + (_) @first-element) ", ) .unwrap(); - let source = "import a.b.c.d; return [w, [1, y], z]"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "import a.b.c.d; return [w, [1, y], z]", &[ (0, vec![("parent", "a"), ("child", "b")]), (0, vec![("parent", "b"), ("child", "c")]), - (1, vec![("last-child", "d")]), (0, vec![("parent", "c"), ("child", "d")]), + (1, vec![("last-child", "d")]), (2, vec![("first-element", "w")]), (2, vec![("first-element", "1")]), - ] + ], + ); + + let query = Query::new( + language, + " + (block . (_) @first-stmt) + (block (_) @stmt) + (block (_) @last-stmt .) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + if a: + b() + c() + if d(): e(); f() + g() + ", + &[ + (0, vec![("first-stmt", "b()")]), + (1, vec![("stmt", "b()")]), + (1, vec![("stmt", "c()")]), + (1, vec![("stmt", "if d(): e(); f()")]), + (0, vec![("first-stmt", "e()")]), + (1, vec![("stmt", "e()")]), + (1, vec![("stmt", "f()")]), + (2, vec![("last-stmt", "f()")]), + (1, vec![("stmt", "g()")]), + (2, vec![("last-stmt", "g()")]), + ], ); }); } @@ -549,22 +809,27 @@ fn test_query_matches_with_repeated_leaf_nodes() { let query = Query::new( language, " - (* + ( (comment)+ @doc . (class_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) - (* + ( (comment)+ @doc . (function_declaration - name: (identifier) @name)) + name: (identifier) @name) + ) ", ) .unwrap(); - let source = " + assert_query_matches( + language, + &query, + " // one // two a(); @@ -582,16 +847,7 @@ fn test_query_matches_with_repeated_leaf_nodes() { // eight function d() {} } - "; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + ", &[ ( 0, @@ -599,47 +855,486 @@ fn test_query_matches_with_repeated_leaf_nodes() { ("doc", "// four"), ("doc", "// five"), ("doc", "// six"), - ("name", "B") - ] + ("name", "B"), + ], ), (1, vec![("doc", "// eight"), ("name", "d")]), - ] + ], ); }); } #[test] -fn test_query_matches_with_repeated_internal_nodes() { +fn test_query_matches_with_optional_nodes_inside_of_repetitions() { allocations::record(|| { let language = get_language("javascript"); - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let mut cursor = QueryCursor::new(); + let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap(); + + assert_query_matches( + language, + &query, + r#" + var a = [1, 2, 3, 4] + "#, + &[( + 0, + vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")], + )], + ); + }); +} + +#[test] +fn test_query_matches_with_top_level_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (comment)+ @doc + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + // a + // b + // c + + d() + + // e + "#, + &[ + (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]), + (0, vec![("doc", "// e")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_non_terminal_repetitions_within_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (_ + (expression_statement + (identifier) @id)+) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + a; + b; + c; + "#, + &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])], + ); + }); +} + +#[test] +fn test_query_matches_with_nested_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (variable_declaration + (","? (variable_declarator name: (identifier) @x))+)+ + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#" + var a = b, c, d + var e, f + + // more + var g + "#, + &[ + ( + 0, + vec![("x", "a"), ("x", "c"), ("x", "d"), ("x", "e"), ("x", "f")], + ), + (0, vec![("x", "g")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() { + allocations::record(|| { + let language = get_language("javascript"); + + // When this query sees a comment, it must keep track of several potential + // matches: up to two for each pattern that begins with a comment. + let query = Query::new( + language, + r#" + (call_expression + function: (member_expression + property: (property_identifier) @name)) @ref.method + + ((comment)* @doc (function_declaration)) + ((comment)* @doc (generator_function_declaration)) + ((comment)* @doc (class_declaration)) + ((comment)* @doc (lexical_declaration)) + ((comment)* @doc (variable_declaration)) + ((comment)* @doc (method_definition)) + + (comment) @comment + "#, + ) + .unwrap(); + + // Here, a series of comments occurs in the middle of a match of the first + // pattern. To avoid exceeding the storage limits and discarding that outer + // match, the comment-related matches need to be managed efficiently. + let source = format!( + "theObject\n{}\n.theMethod()", + " // the comment\n".repeat(64) + ); + + assert_query_matches( + language, + &query, + &source, + &vec![(7, vec![("comment", "// the comment")]); 64] + .into_iter() + .chain(vec![( + 0, + vec![("ref.method", source.as_str()), ("name", "theMethod")], + )]) + .collect::>(), + ); + }); +} + +#[test] +fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + ( + (comment)* @doc + . + (function_declaration + name: (identifier) @name) + ) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + function a() { + // one + var b; + + function c() {} + + // two + // three + var d; + + // four + // five + function e() { + + } + } + + // six + ", + &[ + (0, vec![("name", "a")]), + (0, vec![("name", "c")]), + ( + 0, + vec![("doc", "// four"), ("doc", "// five"), ("name", "e")], + ), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_trailing_optional_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + let query = Query::new( + language, + " + (class_declaration + name: (identifier) @class + (class_heritage + (identifier) @superclass)?) + ", + ) + .unwrap(); + + assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]); + + assert_query_matches( + language, + &query, + " + class A {} + class B extends C {} + class D extends (E.F) {} + ", + &[ + (0, vec![("class", "A")]), + (0, vec![("class", "B"), ("superclass", "C")]), + (0, vec![("class", "D")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_nested_optional_nodes() { + allocations::record(|| { + let language = get_language("javascript"); + + // A function call, optionally containing a function call, which optionally contains a number + let query = Query::new( + language, + " + (call_expression + function: (identifier) @outer-fn + arguments: (arguments + (call_expression + function: (identifier) @inner-fn + arguments: (arguments + (number)? @num))?)) + ", + ) + .unwrap(); + assert_query_matches( + language, + &query, + r#" + a(b, c(), d(null, 1, 2)) + e() + f(g()) + "#, + &[ + (0, vec![("outer-fn", "a"), ("inner-fn", "c")]), + (0, vec![("outer-fn", "c")]), + (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "1")]), + (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "2")]), + (0, vec![("outer-fn", "d")]), + (0, vec![("outer-fn", "e")]), + (0, vec![("outer-fn", "f"), ("inner-fn", "g")]), + (0, vec![("outer-fn", "g")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_repeated_internal_nodes() { + allocations::record(|| { + let language = get_language("javascript"); let query = Query::new( language, " - (* + (_ (method_definition (decorator (identifier) @deco)+ name: (property_identifier) @name)) ", ) .unwrap(); - let source = " + + assert_query_matches( + language, + &query, + " class A { @c @d e() {} } - "; - let tree = parser.parse(source, None).unwrap(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - assert_eq!( - collect_matches(matches, &query, source), - &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")]),] + ", + &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])], + ); + }) +} + +#[test] +fn test_query_matches_with_simple_alternatives() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + " + (pair + key: [(property_identifier) (string)] @key + value: [(function) @val1 (arrow_function) @val2]) + ", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = { + b: c, + 'd': e => f, + g: { + h: function i() {}, + 'x': null, + j: _ => k + }, + 'l': function m() {}, + }; + ", + &[ + (0, vec![("key", "'d'"), ("val2", "e => f")]), + (0, vec![("key", "h"), ("val1", "function i() {}")]), + (0, vec![("key", "j"), ("val2", "_ => k")]), + (0, vec![("key", "'l'"), ("val1", "function m() {}")]), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_in_repetitions() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (array + [(identifier) (string)] @el + . + ( + "," + . + [(identifier) (string)] @el + )*) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = [b, 'c', d, 1, e, 'f', 'g', h]; + ", + &[ + (0, vec![("el", "b"), ("el", "'c'"), ("el", "d")]), + ( + 0, + vec![("el", "e"), ("el", "'f'"), ("el", "'g'"), ("el", "h")], + ), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_at_root() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + [ + "if" + "else" + "function" + "throw" + "return" + ] @keyword + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + function a(b, c, d) { + if (b) { + return c; + } else { + throw d; + } + } + ", + &[ + (0, vec![("keyword", "function")]), + (0, vec![("keyword", "if")]), + (0, vec![("keyword", "return")]), + (0, vec![("keyword", "else")]), + (0, vec![("keyword", "throw")]), + ], + ); + }) +} + +#[test] +fn test_query_matches_with_alternatives_under_fields() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (assignment_expression + left: [ + (identifier) @variable + (member_expression property: (property_identifier) @variable) + ]) + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = b; + b = c.d; + e.f = g; + h.i = j.k; + ", + &[ + (0, vec![("variable", "a")]), + (0, vec![("variable", "b")]), + (0, vec![("variable", "f")]), + (0, vec![("variable", "i")]), + ], ); - }) + }); } #[test] @@ -651,20 +1346,16 @@ fn test_query_matches_in_language_with_simple_aliases() { // tag names, script tag names, and style tag names. All of // these tokens are aliased to `tag_name`. let query = Query::new(language, "(tag_name) @tag").unwrap(); - let source = " + + assert_query_matches( + language, + &query, + "
-
"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); - - assert_eq!( - collect_matches(matches, &query, source), + + ", &[ (0, vec![("tag", "div")]), (0, vec![("tag", "script")]), @@ -680,6 +1371,8 @@ fn test_query_matches_in_language_with_simple_aliases() { #[test] fn test_query_matches_with_different_tokens_with_the_same_string_value() { allocations::record(|| { + // In Rust, there are two '<' tokens: one for the binary operator, + // and one with higher precedence for generics. let language = get_language("rust"); let query = Query::new( language, @@ -690,24 +1383,16 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { ) .unwrap(); - // In Rust, there are two '<' tokens: one for the binary operator, - // and one with higher precedence for generics. - let source = "const A: B = d < e || f > g;"; - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); - - assert_eq!( - collect_matches(matches, &query, source), + assert_query_matches( + language, + &query, + "const A: B = d < e || f > g;", &[ (0, vec![("less", "<")]), (1, vec![("greater", ">")]), (0, vec![("less", "<")]), (1, vec![("greater", ">")]), - ] + ], ); }); } @@ -745,32 +1430,108 @@ fn test_query_matches_with_too_many_permutations_to_track() { } #[test] -fn test_query_matches_with_anonymous_tokens() { +fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( language, - r#" - ";" @punctuation - "&&" @operator - "#, + " + ( + (comment) @doc + ; not immediate + (class_declaration) @class + ) + + (call_expression + function: [ + (identifier) @function + (member_expression property: (property_identifier) @method) + ]) + ", ) .unwrap(); - let source = "foo(a && b);"; + let source = "/* hi */ a.b(); ".repeat(50); let mut parser = Parser::new(); parser.set_language(language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + let matches = cursor.matches(&query, tree.root_node(), to_callback(&source)); assert_eq!( - collect_matches(matches, &query, source), + collect_matches(matches, &query, source.as_str()), + vec![(1, vec![("method", "b")]); 50], + ); + }); +} + +#[test] +fn test_query_matches_with_anonymous_tokens() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + ";" @punctuation + "&&" @operator + "\"" @quote + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + r#"foo(a && "b");"#, &[ (1, vec![("operator", "&&")]), + (2, vec![("quote", "\"")]), + (2, vec![("quote", "\"")]), (0, vec![("punctuation", ";")]), - ] + ], + ); + }); +} + +#[test] +fn test_query_matches_with_supertypes() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + language, + r#" + (argument_list (expression) @arg) + + (keyword_argument + value: (expression) @kw_arg) + + (assignment + left: (identifier) @var_def) + + (primary_expression/identifier) @var_ref + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = b.c( + [d], + # a comment + e=f + ) + ", + &[ + (2, vec![("var_def", "a")]), + (3, vec![("var_ref", "b")]), + (0, vec![("arg", "[d]")]), + (3, vec![("var_ref", "d")]), + (1, vec![("kw_arg", "f")]), + (3, vec![("var_ref", "f")]), + ], ); }); } @@ -804,6 +1565,45 @@ fn test_query_matches_within_byte_range() { }); } +#[test] +fn test_query_captures_within_byte_range() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + " + (call_expression + function: (identifier) @function + arguments: (argument_list (string_literal) @string.arg)) + + (string_literal) @string + ", + ) + .unwrap(); + + let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = + cursor + .set_byte_range(3, 27) + .captures(&query, tree.root_node(), to_callback(source)); + + assert_eq!( + collect_captures(captures, &query, source), + &[ + ("function", "DEFUN"), + ("string.arg", "\"safe-length\""), + ("string", "\"safe-length\""), + ] + ); + }); +} + #[test] fn test_query_matches_different_queries_same_cursor() { allocations::record(|| { @@ -918,6 +1718,150 @@ fn test_query_matches_with_multiple_captures_on_a_node() { }); } +#[test] +fn test_query_matches_with_captured_wildcard_at_root() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + language, + " + ; captured wildcard at the root + (_ [ + (except_clause (block) @block) + (finally_clause (block) @block) + ]) @stmt + + [ + (while_statement (block) @block) + (if_statement (block) @block) + + ; captured wildcard at the root within an alternation + (_ [ + (else_clause (block) @block) + (elif_clause (block) @block) + ]) + + (try_statement (block) @block) + (for_statement (block) @block) + ] @stmt + ", + ) + .unwrap(); + + let source = " + for i in j: + while True: + if a: + print b + elif c: + print d + else: + try: + print f + except: + print g + finally: + print h + else: + print i + " + .trim(); + + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + + let match_capture_names_and_rows = cursor + .matches(&query, tree.root_node(), to_callback(source)) + .map(|m| { + m.captures + .iter() + .map(|c| { + ( + query.capture_names()[c.index as usize].as_str(), + c.node.kind(), + c.node.start_position().row, + ) + }) + .collect::>() + }) + .collect::>(); + + assert_eq!( + match_capture_names_and_rows, + &[ + vec![("stmt", "for_statement", 0), ("block", "block", 1)], + vec![("stmt", "while_statement", 1), ("block", "block", 2)], + vec![("stmt", "if_statement", 2), ("block", "block", 3)], + vec![("stmt", "if_statement", 2), ("block", "block", 5)], + vec![("stmt", "if_statement", 2), ("block", "block", 7)], + vec![("stmt", "try_statement", 7), ("block", "block", 8)], + vec![("stmt", "try_statement", 7), ("block", "block", 10)], + vec![("stmt", "try_statement", 7), ("block", "block", 12)], + vec![("stmt", "while_statement", 1), ("block", "block", 14)], + ] + ) + }); +} + +#[test] +fn test_query_matches_with_no_captures() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (identifier) + (string) @s + "#, + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + a = 'hi'; + b = 'bye'; + ", + &[ + (0, vec![]), + (1, vec![("s", "'hi'")]), + (0, vec![]), + (1, vec![("s", "'bye'")]), + ], + ); + }); +} + +#[test] +fn test_query_matches_with_repeated_fields() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + "(field_declaration declarator: (field_identifier) @field)", + ) + .unwrap(); + + assert_query_matches( + language, + &query, + " + struct S { + int a, b, c; + } + ", + &[ + (0, vec![("field", "a")]), + (0, vec![("field", "b")]), + (0, vec![("field", "c")]), + ], + ); + }); +} + #[test] fn test_query_captures_basic() { allocations::record(|| { @@ -926,12 +1870,12 @@ fn test_query_captures_basic() { language, r#" (pair - key: * @method.def + key: _ @method.def (function name: (identifier) @method.alias)) (variable_declarator - name: * @function.def + name: _ @function.def value: (function name: (identifier) @function.alias)) @@ -1001,20 +1945,25 @@ fn test_query_captures_with_text_conditions() { language, r#" ((identifier) @constant - (match? @constant "^[A-Z]{2,}$")) + (#match? @constant "^[A-Z]{2,}$")) ((identifier) @constructor - (match? @constructor "^[A-Z]")) + (#match? @constructor "^[A-Z]")) ((identifier) @function.builtin - (eq? @function.builtin "require")) + (#eq? @function.builtin "require")) - (identifier) @variable + ((identifier) @variable + (#not-match? @variable "^(lambda|load)$")) "#, ) .unwrap(); let source = " + toad + load + panda + lambda const ab = require('./ab'); new Cd(EF); "; @@ -1028,6 +1977,8 @@ fn test_query_captures_with_text_conditions() { assert_eq!( collect_captures(captures, &query, source), &[ + ("variable", "toad"), + ("variable", "panda"), ("variable", "ab"), ("function.builtin", "require"), ("variable", "require"), @@ -1050,13 +2001,13 @@ fn test_query_captures_with_predicates() { language, r#" ((call_expression (identifier) @foo) - (set! name something) - (set! cool) - (something! @foo omg)) + (#set! name something) + (#set! cool) + (#something! @foo omg)) ((property_identifier) @bar - (is? cool) - (is-not? name something))"#, + (#is? cool) + (#is-not? name something))"#, ) .unwrap(); @@ -1102,13 +2053,13 @@ fn test_query_captures_with_quoted_predicate_args() { language, r#" ((call_expression (identifier) @foo) - (set! one "\"something\ngreat\"")) + (#set! one "\"something\ngreat\"")) ((identifier) - (set! two "\\s(\r?\n)*$")) + (#set! two "\\s(\r?\n)*$")) ((function_declaration) - (set! three "\"something\ngreat\"")) + (#set! three "\"something\ngreat\"")) "#, ) .unwrap(); @@ -1179,7 +2130,7 @@ fn test_query_captures_with_many_nested_results_without_fields() { language, r#" (pair - key: * @method-def + key: _ @method-def (arrow_function)) ":" @colon @@ -1246,7 +2197,7 @@ fn test_query_captures_with_many_nested_results_with_fields() { consequence: (member_expression object: (identifier) @right) alternative: (null)) - (eq? @left @right)) + (#eq? @left @right)) "#, ) .unwrap(); @@ -1390,6 +2341,54 @@ fn test_query_captures_with_too_many_nested_results() { }); } +#[test] +fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + language, + r#" + (array + "[" @l-bracket + "]" @r-bracket) + + "." @dot + "#, + ) + .unwrap(); + + // The '[' node must be returned before all of the '.' nodes, + // even though its pattern does not finish until the ']' node + // at the end of the document. But because the '[' is definite, + // it can be returned before the pattern finishes matching. + let source = " + [ + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + a.b.c.d.e.f.g.h.i, + ] + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let captures = cursor.captures(&query, tree.root_node(), to_callback(source)); + assert_eq!( + collect_captures(captures, &query, source), + [("l-bracket", "[")] + .iter() + .chain([("dot", "."); 40].iter()) + .chain([("r-bracket", "]")].iter()) + .cloned() + .collect::>(), + ); + }); +} + #[test] fn test_query_captures_ordered_by_both_start_and_end_positions() { allocations::record(|| { @@ -1439,7 +2438,7 @@ fn test_query_captures_with_matches_removed() { r#" (binary_expression left: (identifier) @left - operator: * @op + operator: _ @op right: (identifier) @right) "#, ) @@ -1532,9 +2531,9 @@ fn test_query_start_byte_for_pattern() { .trim_start(); let patterns_3 = " - ((identifier) @b (match? @b i)) + ((identifier) @b (#match? @b i)) (function_declaration name: (identifier) @c) - (method_definition name: (identifier) @d) + (method_definition name: (property_identifier) @d) " .trim_start(); @@ -1561,14 +2560,14 @@ fn test_query_capture_names() { language, r#" (if_statement - condition: (binary_expression - left: * @left-operand + condition: (parenthesized_expression (binary_expression + left: _ @left-operand operator: "||" - right: * @right-operand) + right: _ @right-operand)) consequence: (statement_block) @body) (while_statement - condition:* @loop-condition) + condition: _ @loop-condition) "#, ) .unwrap(); @@ -1663,6 +2662,328 @@ fn test_query_disable_pattern() { }); } +#[test] +fn test_query_alternative_predicate_prefix() { + allocations::record(|| { + let language = get_language("c"); + let query = Query::new( + language, + r#" + ((call_expression + function: (identifier) @keyword + arguments: (argument_list + (string_literal) @function)) + (.eq? @keyword "DEFUN")) + "#, + ) + .unwrap(); + let source = r#" + DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0, + doc: /* Return the argument unchanged. */ + attributes: const) + (Lisp_Object arg) + { + return arg; + } + "#; + assert_query_matches( + language, + &query, + source, + &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])], + ); + }); +} + +#[test] +fn test_query_step_is_definite() { + struct Row { + language: Language, + description: &'static str, + pattern: &'static str, + results_by_substring: &'static [(&'static str, bool)], + } + + let rows = &[ + Row { + description: "no definite steps", + language: get_language("python"), + pattern: r#"(expression_statement (string))"#, + results_by_substring: &[("expression_statement", false), ("string", false)], + }, + Row { + description: "all definite steps", + language: get_language("javascript"), + pattern: r#"(object "{" "}")"#, + results_by_substring: &[("object", false), ("{", true), ("}", true)], + }, + Row { + description: "an indefinite step that is optional", + language: get_language("javascript"), + pattern: r#"(object "{" (identifier)? @foo "}")"#, + results_by_substring: &[ + ("object", false), + ("{", true), + ("(identifier)?", false), + ("}", true), + ], + }, + Row { + description: "multiple indefinite steps that are optional", + language: get_language("javascript"), + pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#, + results_by_substring: &[ + ("object", false), + ("{", true), + ("(identifier)? @id1", false), + ("\",\"", false), + ("}", true), + ], + }, + Row { + description: "definite step after indefinite step", + language: get_language("javascript"), + pattern: r#"(pair (property_identifier) ":")"#, + results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)], + }, + Row { + description: "indefinite step in between two definite steps", + language: get_language("javascript"), + pattern: r#"(ternary_expression + condition: (_) + "?" + consequence: (call_expression) + ":" + alternative: (_))"#, + results_by_substring: &[ + ("condition:", false), + ("\"?\"", false), + ("consequence:", false), + ("\":\"", true), + ("alternative:", true), + ], + }, + Row { + description: "one definite step after a repetition", + language: get_language("javascript"), + pattern: r#"(object "{" (_) "}")"#, + results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)], + }, + Row { + description: "definite steps after multiple repetitions", + language: get_language("json"), + pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#, + results_by_substring: &[ + ("object", false), + ("{", false), + ("(pair) \",\" (pair)", false), + ("(pair) \",\" (_)", false), + ("\",\" (_)", false), + ("(_)", true), + ("}", true), + ], + }, + Row { + description: "a definite with a field", + language: get_language("javascript"), + pattern: r#"(binary_expression left: (identifier) right: (_))"#, + results_by_substring: &[ + ("binary_expression", false), + ("(identifier)", false), + ("(_)", true), + ], + }, + Row { + description: "multiple definite steps with fields", + language: get_language("javascript"), + pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, + results_by_substring: &[ + ("function_declaration", false), + ("identifier", true), + ("statement_block", true), + ], + }, + Row { + description: "nesting, one definite step", + language: get_language("javascript"), + pattern: r#" + (function_declaration + name: (identifier) + body: (statement_block "{" (expression_statement) "}"))"#, + results_by_substring: &[ + ("function_declaration", false), + ("identifier", false), + ("statement_block", false), + ("{", false), + ("expression_statement", false), + ("}", true), + ], + }, + Row { + description: "definite step after some deeply nested hidden nodes", + language: get_language("ruby"), + pattern: r#" + (singleton_class + value: (constant) + "end") + "#, + results_by_substring: &[ + ("singleton_class", false), + ("constant", false), + ("end", true), + ], + }, + Row { + description: "nesting, no definite steps", + language: get_language("javascript"), + pattern: r#" + (call_expression + function: (member_expression + property: (property_identifier) @template-tag) + arguments: (template_string)) @template-call + "#, + results_by_substring: &[("property_identifier", false), ("template_string", false)], + }, + Row { + description: "a definite step after a nested node", + language: get_language("javascript"), + pattern: r#" + (subscript_expression + object: (member_expression + object: (identifier) @obj + property: (property_identifier) @prop) + "[") + "#, + results_by_substring: &[ + ("identifier", false), + ("property_identifier", true), + ("[", true), + ], + }, + Row { + description: "a step that is indefinite due to a predicate", + language: get_language("javascript"), + pattern: r#" + (subscript_expression + object: (member_expression + object: (identifier) @obj + property: (property_identifier) @prop) + "[" + (#match? @prop "foo")) + "#, + results_by_substring: &[ + ("identifier", false), + ("property_identifier", false), + ("[", true), + ], + }, + Row { + description: "alternation where one branch has definite steps", + language: get_language("javascript"), + pattern: r#" + [ + (unary_expression (identifier)) + (call_expression + function: (_) + arguments: (_)) + (binary_expression right:(call_expression)) + ] + "#, + results_by_substring: &[ + ("identifier", false), + ("right:", false), + ("function:", true), + ("arguments:", true), + ], + }, + Row { + description: "aliased parent node", + language: get_language("ruby"), + pattern: r#" + (method_parameters "(" (identifier) @id")") + "#, + results_by_substring: &[("\"(\"", false), ("(identifier)", false), ("\")\"", true)], + }, + Row { + description: "long, but not too long to analyze", + language: get_language("javascript"), + pattern: r#" + (object "{" (pair) (pair) (pair) (pair) "}") + "#, + results_by_substring: &[ + ("\"{\"", false), + ("(pair)", false), + ("(pair) \"}\"", false), + ("\"}\"", true), + ], + }, + Row { + description: "too long to analyze", + language: get_language("javascript"), + pattern: r#" + (object "{" (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) "}") + "#, + results_by_substring: &[ + ("\"{\"", false), + ("(pair)", false), + ("(pair) \"}\"", false), + ("\"}\"", false), + ], + }, + Row { + description: "hidden nodes that have several fields", + language: get_language("java"), + pattern: r#" + (method_declaration name: (identifier)) + "#, + results_by_substring: &[("name:", true)], + }, + ]; + + allocations::record(|| { + eprintln!(""); + + for row in rows.iter() { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); + let query = Query::new(row.language, row.pattern).unwrap(); + for (substring, is_definite) in row.results_by_substring { + let offset = row.pattern.find(substring).unwrap(); + assert_eq!( + query.step_is_definite(offset), + *is_definite, + "Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}", + row.description, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), + substring, + is_definite, + ) + } + } + }); +} + +fn assert_query_matches( + language: Language, + query: &Query, + source: &str, + expected: &[(usize, Vec<(&str, &str)>)], +) { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), to_callback(source)); + assert_eq!(collect_matches(matches, &query, source), expected); +} + fn collect_matches<'a>( matches: impl Iterator>, query: &'a Query, diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 41907a3c55..628c0bf651 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -1,61 +1,82 @@ use super::helpers::allocations; use super::helpers::fixtures::{get_language, get_language_queries_path}; +use std::ffi::CStr; use std::ffi::CString; use std::{fs, ptr, slice, str}; +use tree_sitter::Point; use tree_sitter_tags::c_lib as c; -use tree_sitter_tags::{Error, TagKind, TagsConfiguration, TagsContext}; +use tree_sitter_tags::{Error, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" -((function_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @function - (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) +( + (function_definition + name: (identifier) @name + body: (block . (expression_statement (string) @doc))) @definition.function + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + (function_definition - name: (identifier) @name) @function -((class_definition - name: (identifier) @name - body: (block . (expression_statement (string) @doc))) @class - (strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")) + name: (identifier) @name) @definition.function + +( + (class_definition + name: (identifier) @name + body: (block + . (expression_statement (string) @doc))) @definition.class + (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") +) + (class_definition - name: (identifier) @name) @class + name: (identifier) @name) @definition.class + (call - function: (identifier) @name) @call + function: (identifier) @name) @reference.call + +(call + function: (attribute + attribute: (identifier) @name)) @reference.call "#; const JS_TAG_QUERY: &'static str = r#" -((* - (comment)+ @doc . +( + (comment)* @doc . (class_declaration - name: (identifier) @name) @class) - (select-adjacent! @doc @class) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (identifier) @name) @definition.class + (#select-adjacent! @doc @definition.class) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -((* - (comment)+ @doc . +( + (comment)* @doc . (method_definition - name: (property_identifier) @name) @method) - (select-adjacent! @doc @method) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (property_identifier) @name) @definition.method + (#select-adjacent! @doc @definition.method) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -((* - (comment)+ @doc . +( + (comment)* @doc . (function_declaration - name: (identifier) @name) @function) - (select-adjacent! @doc @function) - (strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")) + name: (identifier) @name) @definition.function + (#select-adjacent! @doc @definition.function) + (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") +) -(call_expression function: (identifier) @name) @call - "#; +(call_expression + function: (identifier) @name) @reference.call +"#; const RUBY_TAG_QUERY: &'static str = r#" (method - name: (identifier) @name) @method + name: (_) @name) @definition.method -(method_call - method: (identifier) @name) @call +(call + method: (identifier) @name) @reference.call + +(setter (identifier) @ignore) -((identifier) @name @call - (is-not? local)) +((identifier) @name @reference.call + (#is-not? local)) "#; #[test] @@ -81,25 +102,26 @@ fn test_tags_python() { let tags = tag_context .generate_tags(&tags_config, source, None) .unwrap() + .0 .collect::, _>>() .unwrap(); assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| ( + substr(source, &t.name_range), + tags_config.syntax_type_name(t.syntax_type_id) + )) .collect::>(), &[ - ("Customer", TagKind::Class), - ("age", TagKind::Function), - ("compute_age", TagKind::Call), + ("Customer", "class"), + ("age", "function"), + ("compute_age", "call"), ] ); - assert_eq!(substr(source, &tags[0].line_range), " class Customer:"); - assert_eq!( - substr(source, &tags[1].line_range), - " def age(self):" - ); + assert_eq!(substr(source, &tags[0].line_range), "class Customer:"); + assert_eq!(substr(source, &tags[1].line_range), "def age(self):"); assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer"); assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); } @@ -132,17 +154,22 @@ fn test_tags_javascript() { let tags = tag_context .generate_tags(&tags_config, source, None) .unwrap() + .0 .collect::, _>>() .unwrap(); assert_eq!( tags.iter() - .map(|t| (substr(source, &t.name_range), t.kind)) + .map(|t| ( + substr(source, &t.name_range), + t.span.clone(), + tags_config.syntax_type_name(t.syntax_type_id) + )) .collect::>(), &[ - ("Customer", TagKind::Class), - ("getAge", TagKind::Method), - ("Agent", TagKind::Class) + ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",), + ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",), + ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",) ] ); assert_eq!( @@ -153,6 +180,27 @@ fn test_tags_javascript() { assert_eq!(tags[2].docs, None); } +#[test] +fn test_tags_columns_measured_in_utf16_code_units() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = r#""ā¤ļøā¤ļøā¤ļø".hello_Ī±_Ļ‰()"#.as_bytes(); + + let tag = tag_context + .generate_tags(&tags_config, source, None) + .unwrap() + .0 + .next() + .unwrap() + .unwrap(); + + assert_eq!(substr(source, &tag.name_range), "hello_Ī±_Ļ‰"); + assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32)); + assert_eq!(tag.utf16_column_range, 9..18); +} + #[test] fn test_tags_ruby() { let language = get_language("ruby"); @@ -164,7 +212,7 @@ fn test_tags_ruby() { " b = 1 - def foo() + def foo=() c = 1 # a is a method because it is not in scope @@ -184,6 +232,7 @@ fn test_tags_ruby() { let tags = tag_context .generate_tags(&tags_config, source.as_bytes(), None) .unwrap() + .0 .collect::, _>>() .unwrap(); @@ -191,18 +240,18 @@ fn test_tags_ruby() { tags.iter() .map(|t| ( substr(source.as_bytes(), &t.name_range), - t.kind, + tags_config.syntax_type_name(t.syntax_type_id), (t.span.start.row, t.span.start.column), )) .collect::>(), &[ - ("foo", TagKind::Method, (2, 0)), - ("bar", TagKind::Call, (7, 4)), - ("a", TagKind::Call, (7, 8)), - ("b", TagKind::Call, (7, 11)), - ("each", TagKind::Call, (9, 14)), - ("baz", TagKind::Call, (13, 8)), - ("b", TagKind::Call, (13, 15),), + ("foo=", "method", (2, 4)), + ("bar", "call", (7, 4)), + ("a", "call", (7, 8)), + ("b", "call", (7, 11)), + ("each", "call", (9, 14)), + ("baz", "call", (13, 8)), + ("b", "call", (13, 15),), ] ); } @@ -226,7 +275,7 @@ fn test_tags_cancellation() { .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag)) .unwrap(); - for (i, tag) in tags.enumerate() { + for (i, tag) in tags.0.enumerate() { if i == 150 { cancellation_flag.store(1, Ordering::SeqCst); } @@ -240,6 +289,45 @@ fn test_tags_cancellation() { }); } +#[test] +fn test_invalid_capture() { + let language = get_language("python"); + let e = TagsConfiguration::new(language, "(identifier) @method", "") + .expect_err("expected InvalidCapture error"); + assert_eq!(e, Error::InvalidCapture("method".to_string())); +} + +#[test] +fn test_tags_with_parse_error() { + let language = get_language("python"); + let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); + let mut tag_context = TagsContext::new(); + + let source = br#" + class Fine: pass + class Bad + "#; + + let (tags, failed) = tag_context + .generate_tags(&tags_config, source, None) + .unwrap(); + + let newtags = tags.collect::, _>>().unwrap(); + + assert!(failed, "syntax error should have been detected"); + + assert_eq!( + newtags + .iter() + .map(|t| ( + substr(source, &t.name_range), + tags_config.syntax_type_name(t.syntax_type_id) + )) + .collect::>(), + &[("Fine", "class"),] + ); +} + #[test] fn test_tags_via_c_api() { allocations::record(|| { @@ -303,29 +391,29 @@ fn test_tags_via_c_api() { }) .unwrap(); + let syntax_types: Vec<&str> = unsafe { + let mut len: u32 = 0; + let ptr = + c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len); + slice::from_raw_parts(ptr, len as usize) + .iter() + .map(|i| CStr::from_ptr(*i).to_str().unwrap()) + .collect() + }; + assert_eq!( tags.iter() .map(|tag| ( - tag.kind, + syntax_types[tag.syntax_type_id as usize], &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], )) .collect::>(), &[ - ( - c::TSTagKind::Function, - "b", - "function b() {", - "one\ntwo\nthree" - ), - ( - c::TSTagKind::Class, - "C", - "class C extends D {", - "four\nfive" - ), - (c::TSTagKind::Call, "b", "b(a);", "") + ("function", "b", "function b() {", "one\ntwo\nthree"), + ("class", "C", "class C extends D {", "four\nfive"), + ("call", "b", "b(a);", "") ] ); diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs index 6a857dd94d..1a658281b0 100644 --- a/cli/src/tests/test_highlight_test.rs +++ b/cli/src/tests/test_highlight_test.rs @@ -1,5 +1,6 @@ use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; -use crate::test_highlight::{get_highlight_positions, parse_highlight_test}; +use crate::query_testing::{parse_position_comments, Assertion}; +use crate::test_highlight::get_highlight_positions; use tree_sitter::{Parser, Point}; use tree_sitter_highlight::{Highlight, Highlighter}; @@ -25,13 +26,23 @@ fn test_highlight_test_with_basic_test() { ] .join("\n"); - let assertions = parse_highlight_test(&mut Parser::new(), language, source.as_bytes()).unwrap(); + let assertions = + parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); assert_eq!( assertions, &[ - (Point::new(0, 5), "function".to_string()), - (Point::new(0, 11), "keyword".to_string()), - (Point::new(3, 9), "variable.parameter".to_string()), + Assertion { + position: Point::new(0, 5), + expected_capture_name: "function".to_string() + }, + Assertion { + position: Point::new(0, 11), + expected_capture_name: "keyword".to_string() + }, + Assertion { + position: Point::new(3, 9), + expected_capture_name: "variable.parameter".to_string() + }, ] ); diff --git a/cli/src/util.rs b/cli/src/util.rs index 8978ecc16c..acafa6621c 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -1,3 +1,4 @@ +use super::error::{Error, Result}; use std::io; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -14,14 +15,16 @@ const HTML_HEADER: &[u8] = b"\n Arc { let result = Arc::new(AtomicUsize::new(0)); - thread::spawn({ - let flag = result.clone(); - move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - flag.store(1, Ordering::Relaxed); - } - }); + if atty::is(atty::Stream::Stdin) { + thread::spawn({ + let flag = result.clone(); + move || { + let mut line = String::new(); + io::stdin().read_line(&mut line).unwrap(); + flag.store(1, Ordering::Relaxed); + } + }); + } result } #[cfg(windows)] @@ -31,12 +34,12 @@ pub struct LogSession(); pub struct LogSession(PathBuf, Option, Option); #[cfg(windows)] -pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result { +pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { Ok(LogSession()) } #[cfg(unix)] -pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result { +pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { use std::io::Write; let mut dot_file = std::fs::File::create(path)?; @@ -46,11 +49,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result Resu } // Run `emcc` in a container using the `emscripten-slim` image - command.args(&["trzeci/emscripten-slim", "emcc"]); + command.args(&["emscripten/emsdk", "emcc"]); } else { - return Error::err("You must have either emcc or docker on your PATH to run this command".to_string()); + return Error::err( + "You must have either emcc or docker on your PATH to run this command".to_string(), + ); } command.args(&[ @@ -81,31 +83,22 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu "src", ]); - // Find source files to pass to emscripten - let src_entries = fs::read_dir(&src_dir).map_err(Error::wrap(|| { - format!("Failed to read source directory {:?}", src_dir) - }))?; - - for entry in src_entries { - let entry = entry?; - let file_name = entry.file_name(); - - // Do not compile the node.js binding file. - if file_name - .to_str() - .map_or(false, |s| s.starts_with("binding")) - { - continue; - } + let src = Path::new("src"); + let parser_c_path = src.join("parser.c"); + let scanner_c_path = src.join("scanner.c"); + let scanner_cc_path = src.join("scanner.cc"); + let scanner_cpp_path = src.join("scanner.cpp"); - // Compile any .c, .cc, or .cpp files - if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) { - if extension == "c" || extension == "cc" || extension == "cpp" { - command.arg(Path::new("src").join(entry.file_name())); - } - } + if language_dir.join(&scanner_cc_path).exists() { + command.arg("-xc++").arg(&scanner_cc_path); + } else if language_dir.join(&scanner_cpp_path).exists() { + command.arg("-xc++").arg(&scanner_cpp_path); + } else if language_dir.join(&scanner_c_path).exists() { + command.arg(&scanner_c_path); } + command.arg(&parser_c_path); + let output = command .output() .map_err(Error::wrap(|| "Failed to run emcc command"))?; diff --git a/cli/src/web_ui.rs b/cli/src/web_ui.rs index bfde94a145..9b29a73a0f 100644 --- a/cli/src/web_ui.rs +++ b/cli/src/web_ui.rs @@ -10,6 +10,16 @@ use webbrowser; macro_rules! resource { ($name: tt, $path: tt) => { + #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] + fn $name(tree_sitter_dir: &Option) -> Vec { + if let Some(tree_sitter_dir) = tree_sitter_dir { + fs::read(tree_sitter_dir.join($path)).unwrap() + } else { + include_bytes!(concat!("../../", $path)).to_vec() + } + } + + #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] fn $name(tree_sitter_dir: &Option) -> Vec { if let Some(tree_sitter_dir) = tree_sitter_dir { fs::read(tree_sitter_dir.join($path)).unwrap() @@ -20,9 +30,9 @@ macro_rules! resource { }; } -macro_rules! posix_resource { +macro_rules! optional_resource { ($name: tt, $path: tt) => { - #[cfg(unix)] + #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] fn $name(tree_sitter_dir: &Option) -> Vec { if let Some(tree_sitter_dir) = tree_sitter_dir { fs::read(tree_sitter_dir.join($path)).unwrap() @@ -31,19 +41,23 @@ macro_rules! posix_resource { } } - #[cfg(windows)] - fn $name(_: &Option) -> Vec { - Vec::new() + #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] + fn $name(tree_sitter_dir: &Option) -> Vec { + if let Some(tree_sitter_dir) = tree_sitter_dir { + fs::read(tree_sitter_dir.join($path)).unwrap() + } else { + Vec::new() + } } }; } resource!(get_main_html, "cli/src/web_ui.html"); resource!(get_playground_js, "docs/assets/js/playground.js"); -posix_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); -posix_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); +optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); +optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); -pub fn serve(grammar_path: &Path) { +pub fn serve(grammar_path: &Path, open_in_browser: bool) { let port = get_available_port().expect("Couldn't find an available port"); let url = format!("127.0.0.1:{}", port); let server = Server::http(&url).expect("Failed to start web server"); @@ -59,12 +73,11 @@ pub fn serve(grammar_path: &Path) { ) })) .unwrap(); - - webbrowser::open(&format!("http://127.0.0.1:{}", port)) - .map_err(Error::wrap(|| { - format!("Failed to open '{}' in a web browser", url) - })) - .unwrap(); + if open_in_browser { + if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) { + eprintln!("Failed to open '{}' in a web browser", url); + } + } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); let main_html = String::from_utf8(get_main_html(&tree_sitter_dir)) diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js index 686be90d3d..137bb352a6 100644 --- a/docs/assets/js/playground.js +++ b/docs/assets/js/playground.js @@ -277,7 +277,7 @@ let tree; const startPosition = queryEditor.posFromIndex(error.index); const endPosition = { line: startPosition.line, - ch: startPosition.ch + (error.length || 1) + ch: startPosition.ch + (error.length || Infinity) }; if (error.index === queryText.length) { diff --git a/docs/index.md b/docs/index.md index 052e928090..1293ec4866 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,6 +34,7 @@ Parsers for these languages are fairly complete: * [Elm](https://github.com/razzeee/tree-sitter-elm) * [Eno](https://github.com/eno-lang/tree-sitter-eno) * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) +- [Fennel](https://github.com/travonted/tree-sitter-fennel) * [Go](https://github.com/tree-sitter/tree-sitter-go) * [HTML](https://github.com/tree-sitter/tree-sitter-html) * [Java](https://github.com/tree-sitter/tree-sitter-java) @@ -45,12 +46,15 @@ Parsers for these languages are fairly complete: * [Python](https://github.com/tree-sitter/tree-sitter-python) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) +* [R](https://github.com/r-lib/tree-sitter-r) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) +* [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) +* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) Parsers for these languages are in development: @@ -59,6 +63,7 @@ Parsers for these languages are in development: * [Julia](https://github.com/tree-sitter/tree-sitter-julia) * [Nix](https://github.com/cstrahan/tree-sitter-nix) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) +* [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) * [Swift](https://github.com/tree-sitter/tree-sitter-swift) ### Talks on Tree-sitter diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index 406e836444..75c508f589 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -15,28 +15,27 @@ All of the API functions shown here are declared and documented in the [`tree_si ### Building the Library -To build the library on a POSIX system, run this script, which will create a static library called `libtree-sitter.a` in the Tree-sitter folder: +To build the library on a POSIX system, just run `make` in the Tree-sitter directory. This will create a static library called `libtree-sitter.a` as well as dynamic libraries. -```sh -script/build-lib -``` - -Alternatively, you can use the library in a larger project by adding one source file to the project. This source file needs two directories to be in the include path when compiled: +Alternatively, you can incorporate the library in a larger project's build system by adding one source file to the build. This source file needs two directories to be in the include path when compiled: **source file:** -* `tree-sitter/lib/src/lib.c` + +- `tree-sitter/lib/src/lib.c` **include directories:** -* `tree-sitter/lib/src` -* `tree-sitter/lib/include` + +- `tree-sitter/lib/src` +- `tree-sitter/lib/include` ### The Basic Objects There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`. -* A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. -* A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. -* A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. -* A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. + +- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` is generated by Tree-sitter. Many languages are already available in separate git repositories within the the [Tree-sitter GitHub organization](https://github.com/tree-sitter). See [the next page](./creating-parsers) for how to create new languages. +- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some source code. +- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the source code changes. +- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as well as its relation to other nodes like its parent, siblings and children. ### An Example Program @@ -128,7 +127,7 @@ TSTree *ts_parser_parse_string( ); ``` -You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](https://en.wikipedia.org/wiki/Rope_(data_structure)). In this case, you can use the more general `ts_parser_parse` function: +You may want to parse source code that's stored in a custom data structure, like a [piece table](https://en.wikipedia.org/wiki/Piece_table) or a [rope](). In this case, you can use the more general `ts_parser_parse` function: ```c TSTree *ts_parser_parse( @@ -155,7 +154,7 @@ typedef struct { ### Syntax Nodes -Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's *type* is a string that indicates which grammar rule the node represents. +Tree-sitter provides a [DOM](https://en.wikipedia.org/wiki/Document_Object_Model)-style interface for inspecting syntax trees. A syntax node's _type_ is a string that indicates which grammar rule the node represents. ```c const char *ts_node_type(TSNode); @@ -178,7 +177,7 @@ TSPoint ts_node_end_point(TSNode); ### Retrieving Nodes -Every tree has a *root node*: +Every tree has a _root node_: ```c TSNode ts_tree_root_node(const TSTree *); @@ -199,7 +198,7 @@ TSNode ts_node_prev_sibling(TSNode); TSNode ts_node_parent(TSNode); ``` -These methods may all return a *null node* to indicate, for example, that a node does not *have* a next sibling. You can check if a node is null: +These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling. You can check if a node is null: ```c bool ts_node_is_null(TSNode); @@ -207,21 +206,15 @@ bool ts_node_is_null(TSNode); ### Named vs Anonymous Nodes -Tree-sitter produces [*concrete* syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [*abstract* syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between *named* and *anonymous* nodes. +Tree-sitter produces [_concrete_ syntax trees](https://en.wikipedia.org/wiki/Parse_tree) - trees that contain nodes for every individual token in the source code, including things like commas and parentheses. This is important for use-cases that deal with individual tokens, like [syntax highlighting](https://en.wikipedia.org/wiki/Syntax_highlighting). But some types of code analysis are easier to perform using an [_abstract_ syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) - a tree in which the less important details have been removed. Tree-sitter's trees support these use cases by making a distinction between _named_ and _anonymous_ nodes. Consider a grammar rule like this: ```js -if_statement: $ => seq( - 'if', - '(', - $._expression, - ')', - $._statement, -) +if_statement: ($) => seq("if", "(", $._expression, ")", $._statement); ``` -A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as *named* nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would *not* be named nodes, because they are represented in the grammar as simple strings. +A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they are represented in the grammar as simple strings. You can check whether any given node is named: @@ -242,7 +235,7 @@ If you use this group of methods, the syntax tree functions much like an abstrac ### Node Field Names -To make syntax nodes easier to analyze, many grammars assign unique *field names* to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name: +To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes. The next page [explains](./creating-parsers#using-fields) how to do this on your own grammars. If a syntax node has fields, you can access its children using their field name: ```c TSNode ts_node_child_by_field_name( @@ -270,7 +263,7 @@ TSNode ts_node_child_by_field_id(TSNode, TSFieldId); ### Editing -In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must *edit* the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code. +In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts the ranges of its nodes so that they stay in sync with the code. ```c typedef struct { @@ -293,13 +286,13 @@ When you edit a syntax tree, the positions of its nodes will change. If you have void ts_node_edit(TSNode *, const TSInputEdit *); ``` -This `ts_node_edit` function is *only* needed in the case where you have retrieved `TSNode` instances *before* editing the tree, and then *after* editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. +This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. ### Multi-language Documents Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. -Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain *ranges* of a file. +Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. ```c typedef struct { @@ -409,13 +402,13 @@ Tree-sitter supports multi-threaded use cases by making syntax trees very cheap TSTree *ts_tree_copy(const TSTree *); ``` -Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are *not* thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. +Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a different thread. Note that individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. ## Other Tree Operations ### Walking Trees with Tree Cursors -You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a *tree cursor*. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. +You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. You can initialize a cursor from any node: @@ -441,19 +434,19 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); ``` -### Pattern Matching with Queries +## Pattern Matching with Queries Many code analysis tasks involve searching for patterns in syntax trees. Tree-sitter provides a small declarative language for expressing these patterns and searching for matches. The language is similar to the format of Tree-sitter's [unit test system](./creating-parsers#command-test). -#### Basics +### Query Syntax -A *query* consists of one or more *patterns*, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: +A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: ``` (binary_expression (number_literal) (number_literal)) ``` -Children can also be omitted. For example, this would match any `binary_expression` where at least *one* of child is a `string_literal` node: +Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: ``` (binary_expression (string_literal)) @@ -481,13 +474,13 @@ The parenthesized syntax for writing nodes only applies to [named nodes](#named- #### Capturing Nodes -When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written *after* the nodes that they refer to, and start with an `@` character. +When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_ the nodes that they refer to, and start with an `@` character. -For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `function-definition` with the identifier: +For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: ``` (assignment_expression - left: (identifier) @function-definition + left: (identifier) @the-function-name right: (function)) ``` @@ -501,29 +494,156 @@ And this pattern would match all method definitions, associating the name `the-m name: (property_identifier) @the-method-name))) ``` +#### Quantification Operators + +You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously to the `+` and `*` operators [in regular expressions](https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts). The `+` operator matches _one or more_ repetitions of a pattern, and the `*` operator matches _zero or more_. + +For example, this pattern would match a sequence of one or more comments: + +``` +(comment)+ +``` + +This pattern would match a class declaration, capturing all of the decorators if any were present: + +``` +(class_declaration + (decorator)* @the-decorator + name: (identifier) @the-name) +``` + +You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: + +``` +(call_expression + function: (identifier) @the-function + arguments: (arguments (string)? @the-string-arg)) +``` + +#### Grouping Sibling Nodes + +You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: + +``` +( + (comment) + (function_declaration) +) +``` + +Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: + +``` +( + (number) + ("," (number))* +) +``` + +#### Alternations + +An alternation is written as a pair of square brackets (`[]`) containing a list of alternative patterns. +This is similar to _character classes_ from regular expressions (`[abc]` matches either a, b, or c). + +For example, this pattern would match a call to either a variable or an object property. +In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: + +``` +(call_expression + function: [ + (identifier) @function + (member_expression + property: (property_identifier) @method) + ]) +``` + +This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: + +``` +[ + "break" + " atch" + "delete" + "else" + "for" + "function" + "if" + "return" + "try" + "while" +] @keyword +``` + +#### Wildcard Node + +A wildcard node is represented with an underscore (`(_)`), it matches any node. +This is similar to `.` in regular expressions. + +For example, this pattern would match any node inside a call: + +``` +(call (_) @call.inner) +``` + + +#### Anchors + +The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors depending on where it's placed inside a query. + +When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`: + +``` +(array . (identifier) @the-element) +``` + +Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound to each matched identifier. + +Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. + +``` +(block (_) @last-expression .) +``` + +Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`. + +``` +(dotted_name + (identifier) @prev-id + . + (identifier) @next-id) +``` + +Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched. + +The restrictions placed on a pattern by an anchor operator ignore anonymous nodes. + #### Predicates -You can also specify other conditions that should restrict the nodes that match a given pattern. You do this by enclosing the pattern in an additional pair of parentheses, and specifying one or more *predicate* S-expressions after your main pattern. Predicate S-expressions must start with a predicate name, and contain either `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associed with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: ``` -((identifier) @constant - (match? @constant "^[A-Z][A-Z_]+")) +( + (identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+") +) ``` And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: ``` -((pair - key: (property_identifier) @key-name - value: (identifier) @value-name) - (eq? @key-name @value-name)) +( + (pair + key: (property_identifier) @key-name + value: (identifier) @value-name) + (#eq? @key-name @value-name) +) ``` -*Note* - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `eq?` and `match?`. +_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. -#### The Query API +### The Query API Create a query by specifying a string containing one or more patterns: @@ -583,7 +703,7 @@ This function will return `false` when there are no more matches. Otherwise, it ## Static Node Types -In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This *node types* file provides structured data about every possible syntax node in a grammar. +In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_ file provides structured data about every possible syntax node in a grammar. You can use this data to generate type declarations in statically-typed programming languages. For example, GitHub's [Semantic](https://github.com/github/semantic) uses these node types files to [generate Haskell data types](https://github.com/github/semantic/tree/master/semantic-ast) for every possible syntax node, which allows for code analysis algorithms to be structurally verified by the Haskell type system. @@ -593,9 +713,8 @@ The node types file contains an array of objects, each of which describes a part Every object in this array has these two entries: -* `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). -* `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. - +- `"type"` - A string that indicates which grammar rule the node represents. This corresponds to the `ts_node_type` function described [above](#syntax-nodes). +- `"named"` - A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string literal. See [above](#named-vs-anonymous-nodes) for more info. Examples: @@ -614,16 +733,16 @@ Together, these two fields constitute a unique identifier for a node type; no tw #### Internal Nodes -Many syntax nodes can have *children*. The node type object describes the possible children that a node can have using the following entries: +Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the following entries: -* `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are *child type* objects, described below. -* `"children"` - Another *child type* object that describes all of the node's possible *named* children *without* fields. +- `"fields"` - An object that describes the possible [fields](#node-field-names) that the node can have. The keys of this object are field names, and the values are _child type_ objects, described below. +- `"children"` - Another _child type_ object that describes all of the node's possible _named_ children _without_ fields. -A *child type* object describes a set of child nodes using the following entries: +A _child type_ object describes a set of child nodes using the following entries: -* `"required"` - A boolean indicating whether there is always *at least one* node in this set. -* `"multiple"` - A boolean indicating whether there can be *multiple* nodes in this set. -* `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. +- `"required"` - A boolean indicating whether there is always _at least one_ node in this set. +- `"multiple"` - A boolean indicating whether there can be _multiple_ nodes in this set. +- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` and `"named"`, whose meanings are described above. Example with fields: @@ -635,31 +754,25 @@ Example with fields: "body": { "multiple": false, "required": true, - "types": [ - {"type": "statement_block", "named": true} - ] + "types": [{ "type": "statement_block", "named": true }] }, "decorator": { "multiple": true, "required": false, - "types": [ - {"type": "decorator", "named": true} - ] + "types": [{ "type": "decorator", "named": true }] }, "name": { "multiple": false, "required": true, "types": [ - {"type": "computed_property_name", "named": true}, - {"type": "property_identifier", "named": true}, + { "type": "computed_property_name", "named": true }, + { "type": "property_identifier", "named": true } ] }, "parameters": { "multiple": false, "required": true, - "types": [ - {"type": "formal_parameters", "named": true} - ] + "types": [{ "type": "formal_parameters", "named": true }] } } } @@ -676,8 +789,8 @@ Example with children: "multiple": true, "required": false, "types": [ - {"type": "_expression", "named": true}, - {"type": "spread_element", "named": true} + { "type": "_expression", "named": true }, + { "type": "spread_element", "named": true } ] } } @@ -685,11 +798,11 @@ Example with children: #### Supertype Nodes -In Tree-sitter grammars, there are usually certain rules that represent abstract *categories* of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol. +In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression", "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules](./creating-parsers#hiding-rules) whose definition is a simple [`choice`](./creating-parsers#the-grammar-dsl) where each member is just a single symbol. -Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it *will* show up in the node types file, with the following special entry: +Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add a hidden rule to the grammar's [`supertypes` list](./creating-parsers#the-grammar-dsl), then it _will_ show up in the node types file, with the following special entry: -* `"subtypes"` - An array of objects that specify the *types* of nodes that this 'supertype' node can wrap. +- `"subtypes"` - An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap. Example: @@ -698,11 +811,11 @@ Example: "type": "_declaration", "named": true, "subtypes": [ - {"type": "class_declaration", "named": true}, - {"type": "function_declaration", "named": true}, - {"type": "generator_function_declaration", "named": true}, - {"type": "lexical_declaration", "named": true}, - {"type": "variable_declaration", "named": true} + { "type": "class_declaration", "named": true }, + { "type": "function_declaration", "named": true }, + { "type": "generator_function_declaration", "named": true }, + { "type": "lexical_declaration", "named": true }, + { "type": "variable_declaration", "named": true } ] } ``` @@ -719,17 +832,13 @@ Example: "declaration": { "multiple": false, "required": false, - "types": [ - {"type": "_declaration", "named": true} - ] + "types": [{ "type": "_declaration", "named": true }] }, "source": { "multiple": false, "required": false, - "types": [ - {"type": "string", "named": true} - ] - }, + "types": [{ "type": "string", "named": true }] + } } } ``` diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index dc7285f544..3fc8f04aa5 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -13,12 +13,12 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo In order to develop a Tree-sitter parser, there are two dependencies that you need to install: -* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. It shouldn't matter what version of Node you have. +* **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater. * **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform. ### Installation -To create a Tree-sitter parser, you need to use the [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways: +To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways: * Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This is the recommended approach, and it is discussed further in the next section. * Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`. @@ -66,7 +66,7 @@ module.exports = grammar({ }); ``` -Then run the the following command: +Then run the following command: ```sh tree-sitter generate @@ -152,7 +152,7 @@ func x() int { These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly. -By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the the `-f` flag: +By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag: ```sh tree-sitter test -f 'Return statements' @@ -184,10 +184,10 @@ You can run your parser on an arbitrary file using `tree-sitter parse`. This wil (int_literal [1, 9] - [1, 10])))))) ``` -You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error: +You can pass any number of file paths and glob patterns to `tree-sitter parse`, and it will parse all of the given files. The command will exit with a non-zero status code if any parse errors occurred. You can also prevent the syntax trees from being printed using the `--quiet` flag. Additionally, the `--stat` flag prints out aggregated parse success/failure information for all processed files. This makes `tree-sitter parse` usable as a secondary testing strategy: you can check that a large number of files parse without error: ```sh -tree-sitter parse 'examples/**/*.go' --quiet +tree-sitter parse 'examples/**/*.go' --quiet --stat ``` ### Command: `highlight` @@ -204,12 +204,13 @@ The following is a complete list of built-in functions you can use in your `gram * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation. * **Repetitions : `repeat(rule)`** - This function creates a rule that matches *zero-or-more* occurrences of a given rule. It is analogous to the `{x}` (curly brace) syntax in EBNF notation. * **Repetitions : `repeat1(rule)`** - This function creates a rule that matches *one-or-more* occurrences of a given rule. The previous `repeat` rule is implemented in terms of `repeat1` but is included because it is very commonly used. -* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule it is analogous to the `[x]` (square bracket) syntax in EBNF notation. +* **Options : `optional(rule)`** - This function creates a rule that matches *zero or one* occurrence of a given rule. It is analogous to the `[x]` (square bracket) syntax in EBNF notation. * **Precedence : `prec(number, rule)`** - This function marks the given rule with a numerical precedence which will be used to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the [precedence directives][yacc-prec] in Yacc grammars. * **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars. * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. -* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. +* **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. * **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token. +* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace. * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string. * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children. @@ -344,7 +345,7 @@ Imagine that you were just starting work on the [Tree-sitter JavaScript parser][ return x + y; ``` -According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: +According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: ``` ReturnStatement -> 'return' Expression @@ -505,6 +506,8 @@ Grammars often contain multiple tokens that can match the same characters. For e 4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. +5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. + ### Keywords Many languages have a set of *keyword* tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is used as a binary operator, like this: diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index 85ccfb6242..cbf97b664e 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -224,7 +224,7 @@ The capture names are as follows: When highlighting a file, Tree-sitter will keep track of the set of scopes that contains any given position, and the set of definitions within each scope. When processing a syntax node that is captured as a `local.reference`, Tree-sitter will try to find a definition for a name that matches the node's text. If it finds a match, Tree-sitter will ensure that the *reference* and the *definition* are colored the same. -The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes which have been identified as local variables by adding the predicate `(is-not? local)` to the pattern. This is used in the example below: +The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes which have been identified as local variables by adding the predicate `(#is-not? local)` to the pattern. This is used in the example below: #### Example Input @@ -299,7 +299,7 @@ Let's write some queries that let us clearly distinguish between these types of (block_parameters (identifier) @variable.parameter) ((identifier) @function.method - (is-not? local)) + (#is-not? local)) ``` Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*: @@ -385,6 +385,14 @@ The following query would specify that the contents of the heredoc should be par (heredoc_end) @injection.language) @injection.content ``` +You can also force the language using the `#set!` predicate. +For example, this will force the language to be always `ruby`. + +``` +((heredoc_body) @injection.content + (#set! injection.language "ruby")) +``` + ## Unit Testing Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system](https://www.sublimetext.com/docs/3/syntax.html#testing) for testing highlighting. diff --git a/docs/section-6-contributing.md b/docs/section-6-contributing.md index 7e11dc00c4..4ccaddea30 100644 --- a/docs/section-6-contributing.md +++ b/docs/section-6-contributing.md @@ -18,7 +18,7 @@ To make changes to Tree-sitter, you should have: 1. A C compiler, for compiling the core library and the generated parsers. 2. A [Rust toolchain](https://rustup.rs/), for compiling the Rust bindings, the highlighting library, and the CLI. 3. Node.js and NPM, for generating parsers from `grammar.js` files. -4. Either [Docker](https://www.docker.com/) or [Emscripten](https://emscripten.org/), for compiling the library to WASM. +4. Either [Emscripten](https://emscripten.org/) or [Docker](https://www.docker.com/), for compiling the library to WASM. ### Building @@ -29,7 +29,7 @@ git clone https://github.com/tree-sitter/tree-sitter cd tree-sitter ``` -Build the WASM library. We do this first because it gets embedded in the CLI to enable the `web-ui` command. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker: +Optionally, build the WASM library. If you skip this step, then the `tree-sitter web-ui` command will require an internet connection. If you have emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker: ```sh ./script/build-wasm @@ -86,7 +86,7 @@ You can run the tests under the debugger (either `lldb` or `gdb`) using the `-g` script/test test_does_something -g ``` -Part of the Tree-sitter test suite involves parsing the *corpus* tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular *language*, you can pass the `-l` flag. And if you want to run a particular *example* from the corpus, you can pass the `-e` flag: +Part of the Tree-sitter test suite involves parsing the _corpus_ tests for several different languages and performing randomized edits to each example in the corpus. If you just want to run the tests for a particular _language_, you can pass the `-l` flag. And if you want to run a particular _example_ from the corpus, you can pass the `-e` flag: ```sh script/test -l javascript -e Arrays @@ -96,18 +96,18 @@ script/test -l javascript -e Arrays The main [`tree-sitter/tree-sitter`](https://github.com/tree-sitter/tree-sitter) repository contains the source code for several packages that are published to package registries for different languages: -* Rust crates on [crates.io](https://crates.io): - * [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library - * [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library - * [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool -* JavaScript modules on [npmjs.com](https://npmjs.com): - * [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library - * [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool +- Rust crates on [crates.io](https://crates.io): + - [`tree-sitter`](https://crates.io/crates/tree-sitter) - A Rust binding to the core library + - [`tree-sitter-highlight`](https://crates.io/crates/tree-sitter-highlight) - The syntax-highlighting library + - [`tree-sitter-cli`](https://crates.io/crates/tree-sitter-cli) - The command-line tool +- JavaScript modules on [npmjs.com](https://npmjs.com): + - [`web-tree-sitter`](https://www.npmjs.com/package/web-tree-sitter) - A WASM-based JavaScript binding to the core library + - [`tree-sitter-cli`](https://www.npmjs.com/package/tree-sitter-cli) - The command-line tool There are also several other dependent repositories that contain other published packages: -* [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com -* [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org). +- [`tree-sitter/node-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Node.js bindings to the core library, published as [`tree-sitter`](https://www.npmjs.com/package/tree-sitter) on npmjs.com +- [`tree-sitter/py-tree-sitter`](https://github.com/tree-sitter/py-tree-sitter) - Python bindings to the core library, published as [`tree-sitter`](https://pypi.org/project/tree-sitter) on [PyPI.org](https://pypi.org). ## Publishing New Releases @@ -115,31 +115,31 @@ Publishing a new release of the CLI requires these steps: 1. Commit and push all outstanding changes and verify that CI passes: - ```sh - git commit -m "Fix things" - git push - ``` + ```sh + git commit -m "Fix things" + git push + ``` 2. Create a new tag: - ```sh - script/version patch - ``` + ```sh + script/version patch + ``` - This will determine the current version, increment the *patch* version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command. + This will determine the current version, increment the _patch_ version number, and update the `Cargo.toml` and `package.json` files for the Rust and Node CLI packages. It will then create a commit and a tag for the new version. For more information about the arguments that are allowed, see the documentation for the [`npm version`](https://docs.npmjs.com/cli/version) command. 3. Push the commit and the tag: - ```sh - git push - git push --tags - ``` + ```sh + git push + git push --tags + ``` 4. Wait for CI to pass. Because of the git tag, the CI jobs will publish artifacts to [a GitHub release](https://github.com/tree-sitter/tree-sitter/releases). The npm module of `tree-sitter-cli` works by downloading the appropriate binary from the corresponding GitHub release during installation. So it's best not to publish the npm package until the binaries are uploaded. 5. Publish the npm package: - ```sh - cd cli/npm - npm publish - ``` + ```sh + cd cli/npm + npm publish + ``` diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index 94a4e03290..7f8fc04aa2 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-highlight" description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.1.6" +version = "0.3.0" authors = [ "Max Brunsfeld ", "Tim Clem " diff --git a/highlight/README.md b/highlight/README.md index ae462f9d8b..07edc421c0 100644 --- a/highlight/README.md +++ b/highlight/README.md @@ -17,7 +17,7 @@ extern "C" tree_sitter_javascript(); Define the list of highlight names that you will recognize: ```rust -let highlight_names = [ +let highlight_names : Vec = [ "attribute", "constant", "function.builtin", @@ -93,14 +93,14 @@ let highlights = highlighter.highlight( ).unwrap(); for event in highlights { - match event? { + match event.unwrap() { HighlightEvent::Source {start, end} => { eprintln!("source: {}-{}", start, end); }, - HighlightEvent::HighlightStart(s) { + HighlightEvent::HighlightStart(s) => { eprintln!("highlight style started: {:?}", s); }, - HighlightEvent::HighlightEnd { + HighlightEvent::HighlightEnd => { eprintln!("highlight style ended"); }, } diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 6f1b7bbdbf..0f48847beb 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -5,11 +5,13 @@ pub use c_lib as c; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ - Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, - Range, Tree, + Language, LossyUtf8, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, + QueryMatch, Range, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; +const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; +const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; /// Indicates which highlight should be applied to a region of source code. #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -620,7 +622,7 @@ where type Item = Result; fn next(&mut self) -> Option { - loop { + 'main: loop { // If we've already determined the next highlight boundary, just return it. if let Some(e) = self.next_event.take() { return Some(Ok(e)); @@ -640,29 +642,34 @@ where // If none of the layers have any more highlight boundaries, terminate. if self.layers.is_empty() { - if self.byte_offset < self.source.len() { + return if self.byte_offset < self.source.len() { let result = Some(Ok(HighlightEvent::Source { start: self.byte_offset, end: self.source.len(), })); self.byte_offset = self.source.len(); - return result; + result } else { - return None; - } + None + }; } // Get the next capture from whichever layer has the earliest highlight boundary. - let match_; - let mut captures; - let mut capture; - let mut pattern_index; + let range; let layer = &mut self.layers[0]; - if let Some((m, capture_index)) = layer.captures.peek() { - match_ = m; - captures = match_.captures; - pattern_index = match_.pattern_index; - capture = captures[*capture_index]; + if let Some((next_match, capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*capture_index]; + range = next_capture.node.byte_range(); + + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } } // If there are no more captures, then emit any remaining highlight end events. // And if there are none of those, then just advance to the end of the document. @@ -673,30 +680,17 @@ where return self.emit_event(self.source.len(), None); }; - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - let range = capture.node.byte_range(); - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } + let (mut match_, capture_index) = layer.captures.next().unwrap(); + let mut capture = match_.captures[capture_index]; // If this capture represents an injection, then process the injection. - if pattern_index < layer.config.locals_pattern_index { + if match_.pattern_index < layer.config.locals_pattern_index { let (language_name, content_node, include_children) = - injection_for_match(&layer.config, &layer.config.query, match_, &self.source); + injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. The `unwrap` is ok because - layer.captures.next().unwrap().0.remove(); + // in the stream of captures. + match_.remove(); // If a language is found with the given name, then add a new language layer // to the highlighted document. @@ -729,16 +723,19 @@ where } self.sort_layers(); - continue; + continue 'main; } - layer.captures.next(); + // Remove from the local scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } // If this capture is for tracking local variables, then process the // local variable info. let mut reference_highlight = None; let mut definition_highlight = None; - while pattern_index < layer.config.highlights_pattern_index { + while match_.pattern_index < layer.config.highlights_pattern_index { // If the node represents a local scope, push a new local scope onto // the scope stack. if Some(capture.index) == layer.config.local_scope_capture_index { @@ -748,7 +745,7 @@ where range: range.clone(), local_defs: Vec::new(), }; - for prop in layer.config.query.property_settings(pattern_index) { + for prop in layer.config.query.property_settings(match_.pattern_index) { match prop.key.as_ref() { "local.scope-inherits" => { scope.inherits = @@ -767,7 +764,7 @@ where let scope = layer.scope_stack.last_mut().unwrap(); let mut value_range = 0..0; - for capture in captures { + for capture in match_.captures { if Some(capture.index) == layer.config.local_def_value_capture_index { value_range = capture.node.byte_range(); } @@ -810,84 +807,76 @@ where } } - // Continue processing any additional local-variable-tracking patterns - // for the same node. + // Continue processing any additional matches for the same node. if let Some((next_match, next_capture_index)) = layer.captures.peek() { let next_capture = next_match.captures[*next_capture_index]; if next_capture.node == capture.node { - pattern_index = next_match.pattern_index; - captures = next_match.captures; capture = next_capture; - layer.captures.next(); + match_ = layer.captures.next().unwrap().0; continue; - } else { - break; } } - break; + self.sort_layers(); + continue 'main; } // Otherwise, this capture must represent a highlight. - let mut has_highlight = true; - // If this exact range has already been highlighted by an earlier pattern, or by // a different layer, then skip over this one. if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { if range.start == last_start && range.end == last_end && layer.depth < last_depth { - has_highlight = false; + self.sort_layers(); + continue 'main; } } // If the current node was found to be a local variable, then skip over any // highlighting patterns that are disabled for local variables. - while has_highlight - && (definition_highlight.is_some() || reference_highlight.is_some()) - && layer.config.non_local_variable_patterns[pattern_index] - { - has_highlight = false; - if let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - has_highlight = true; - pattern_index = next_match.pattern_index; - layer.captures.next(); - continue; + if definition_highlight.is_some() || reference_highlight.is_some() { + while layer.config.non_local_variable_patterns[match_.pattern_index] { + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = layer.captures.next().unwrap().0; + continue; + } } + + self.sort_layers(); + continue 'main; } - break; } - if has_highlight { - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = layer.captures.peek() { - if next_match.captures[*next_capture_index].node == capture.node { - layer.captures.next(); - } else { - break; - } + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + layer.captures.next(); + } else { + break; } + } - let current_highlight = layer.config.highlight_indices[capture.index as usize]; + let current_highlight = layer.config.highlight_indices[capture.index as usize]; - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; + } - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + self.last_highlight_range = Some((range.start, range.end, layer.depth)); + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); } self.sort_layers(); @@ -897,11 +886,13 @@ where impl HtmlRenderer { pub fn new() -> Self { - HtmlRenderer { - html: Vec::new(), - line_offsets: vec![0], + let mut result = HtmlRenderer { + html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY), + line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY), carriage_return_highlight: None, - } + }; + result.line_offsets.push(0); + result } pub fn set_carriage_return_highlight(&mut self, highlight: Option) { @@ -909,8 +900,8 @@ impl HtmlRenderer { } pub fn reset(&mut self) { - self.html.clear(); - self.line_offsets.clear(); + shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY); + shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY); self.line_offsets.push(0); } @@ -1000,7 +991,7 @@ impl HtmlRenderer { F: Fn(Highlight) -> &'a [u8], { let mut last_char_was_cr = false; - for c in util::LossyUtf8::new(src).flat_map(|p| p.bytes()) { + for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) { // Don't render carriage return characters, but allow lone carriage returns (not // followed by line feeds) to be styled via the attribute callback. if c == b'\r' { @@ -1055,7 +1046,7 @@ fn injection_for_match<'a>( for prop in query.property_settings(query_match.pattern_index) { match prop.key.as_ref() { // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `set!` predicate + // captured node, it can also be hard-coded via a `#set!` predicate // that sets the injection.language key. "injection.language" => { if language_name.is_none() { @@ -1065,7 +1056,7 @@ fn injection_for_match<'a>( // By default, injections do not include the *children* of an // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `set!` predicate that + // node itself. This can be changed using a `#set!` predicate that // sets the `injection.include-children` key. "injection.include-children" => include_children = true, _ => {} @@ -1074,3 +1065,11 @@ fn injection_for_match<'a>( (language_name, content_node, include_children) } + +fn shrink_and_clear(vec: &mut Vec, capacity: usize) { + if vec.len() > capacity { + vec.truncate(capacity); + vec.shrink_to_fit(); + } + vec.clear(); +} diff --git a/highlight/src/util.rs b/highlight/src/util.rs index 6c325a6cf9..29adb13b11 100644 --- a/highlight/src/util.rs +++ b/highlight/src/util.rs @@ -1,56 +1,3 @@ -use std::str; - -pub struct LossyUtf8<'a> { - bytes: &'a [u8], - in_replacement: bool, -} - -impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { - LossyUtf8 { - bytes, - in_replacement: false, - } - } -} - -impl<'a> Iterator for LossyUtf8<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - if self.bytes.is_empty() { - return None; - } - if self.in_replacement { - self.in_replacement = false; - return Some("\u{fffd}"); - } - match str::from_utf8(self.bytes) { - Ok(valid) => { - self.bytes = &[]; - Some(valid) - } - Err(error) => { - if let Some(error_len) = error.error_len() { - let error_start = error.valid_up_to(); - if error_start > 0 { - let result = - unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) }; - self.bytes = &self.bytes[(error_start + error_len)..]; - self.in_replacement = true; - Some(result) - } else { - self.bytes = &self.bytes[error_len..]; - Some("\u{fffd}") - } - } else { - None - } - } - } - } -} - pub fn html_escape(c: u8) -> Option<&'static [u8]> { match c as char { '>' => Some(b">"), diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 788294205a..e8305c0e82 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.6.3" +version = "0.17.1" authors = ["Max Brunsfeld "] license = "MIT" readme = "binding_rust/README.md" @@ -15,14 +15,16 @@ include = [ "/binding_rust/*", "/Cargo.toml", "/include/*", - "/src/*", + "/src/*.h", + "/src/*.c", + "/src/unicode/*", ] [dependencies] regex = "1" [build-dependencies] -cc = "1.0" +cc = "^1.0.58" [lib] path = "binding_rust/lib.rs" diff --git a/lib/binding_rust/README.md b/lib/binding_rust/README.md index 0ee4ba3f97..e85f45f356 100644 --- a/lib/binding_rust/README.md +++ b/lib/binding_rust/README.md @@ -1,5 +1,4 @@ -Rust Tree-sitter -================ +# Rust Tree-sitter [![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter) [![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master) @@ -14,15 +13,12 @@ First, create a parser: ```rust use tree_sitter::{Parser, Language}; -// ... - let mut parser = Parser::new(); ``` Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory): -```rust -extern crate cc; +```rust use std::path::PathBuf; fn main() { @@ -37,12 +33,13 @@ fn main() { ``` Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: + ```toml [build-dependencies] cc="*" ``` -To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. +To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. ```rust extern "C" { fn tree_sitter_c() -> Language; } diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index cba87fa312..f28d346111 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -132,6 +132,7 @@ pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; +pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; pub type TSQueryError = u32; extern "C" { #[doc = " Create a new parser."] @@ -172,9 +173,9 @@ extern "C" { #[doc = " the given ranges must be ordered from earliest to latest in the document,"] #[doc = " and they must not overlap. That is, the following must hold for all"] #[doc = " `i` < `length - 1`:"] - #[doc = " ```text"] + #[doc = ""] #[doc = " ranges[i].end_byte <= ranges[i + 1].start_byte"] - #[doc = " ```"] + #[doc = ""] #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] #[doc = " will not be assigned, and this function will return `false`. On success,"] #[doc = " this function returns `true`"] @@ -649,6 +650,9 @@ extern "C" { length: *mut u32, ) -> *const TSQueryPredicateStep; } +extern "C" { + pub fn ts_query_step_is_definite(self_: *const TSQuery, byte_offset: u32) -> bool; +} extern "C" { #[doc = " Get the name and length of one of the query\'s captures, or one of the"] #[doc = " query\'s string literals. Each capture and string is associated with a"] @@ -800,5 +804,5 @@ extern "C" { pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 11; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9; diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs index caf5fa8e74..0ec7a4ad6e 100644 --- a/lib/binding_rust/build.rs +++ b/lib/binding_rust/build.rs @@ -21,8 +21,8 @@ fn main() { let mut config = cc::Build::new(); - println!("cargo:rerun-if-env-changed=DEBUG"); - if env::var("DEBUG").map(|s| s == "true").unwrap_or(false) { + println!("cargo:rerun-if-env-changed=PROFILE"); + if env::var("PROFILE").map_or(false, |s| s == "debug") { config.define("TREE_SITTER_TEST", ""); } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index a13d9168a7..0b0097f93e 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -138,7 +138,7 @@ pub struct QueryCaptures<'a, T: AsRef<[u8]>> { } /// A particular `Node` that has been captured with a particular name within a `Query`. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] #[repr(C)] pub struct QueryCapture<'a> { pub node: Node<'a>, @@ -157,19 +157,36 @@ pub struct IncludedRangesError(pub usize); /// An error that occurred when trying to create a `Query`. #[derive(Debug, PartialEq, Eq)] -pub enum QueryError { - Syntax(usize, String), - NodeType(usize, String), - Field(usize, String), - Capture(usize, String), - Predicate(String), +pub struct QueryError { + pub row: usize, + pub column: usize, + pub offset: usize, + pub message: String, + pub kind: QueryErrorKind, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum QueryErrorKind { + Syntax, + NodeType, + Field, + Capture, + Predicate, + Structure, } #[derive(Debug)] enum TextPredicate { CaptureEqString(u32, String, bool), CaptureEqCapture(u32, u32, bool), - CaptureMatchString(u32, regex::bytes::Regex), + CaptureMatchString(u32, regex::bytes::Regex, bool), +} + +// TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` +// is ever stabilized. +pub struct LossyUtf8<'a> { + bytes: &'a [u8], + in_replacement: bool, } impl Language { @@ -1164,39 +1181,59 @@ impl Query { let offset = error_offset as usize; let mut line_start = 0; let mut row = 0; - let line_containing_error = source.split("\n").find_map(|line| { - row += 1; + let mut line_containing_error = None; + for line in source.split("\n") { let line_end = line_start + line.len() + 1; if line_end > offset { - Some(line) - } else { - line_start = line_end; - None + line_containing_error = Some(line); + break; + } + line_start = line_end; + row += 1; + } + let column = offset - line_start; + + let kind; + let message; + match error_type { + // Error types that report names + ffi::TSQueryError_TSQueryErrorNodeType + | ffi::TSQueryError_TSQueryErrorField + | ffi::TSQueryError_TSQueryErrorCapture => { + let suffix = source.split_at(offset).1; + let end_offset = suffix + .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') + .unwrap_or(source.len()); + message = suffix.split_at(end_offset).0.to_string(); + kind = match error_type { + ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType, + ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field, + ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture, + _ => unreachable!(), + }; } - }); - - let message = if let Some(line) = line_containing_error { - line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" - } else { - "Unexpected EOF".to_string() - }; - // if line_containing_error - return if error_type != ffi::TSQueryError_TSQueryErrorSyntax { - let suffix = source.split_at(offset).1; - let end_offset = suffix - .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') - .unwrap_or(source.len()); - let name = suffix.split_at(end_offset).0.to_string(); - match error_type { - ffi::TSQueryError_TSQueryErrorNodeType => Err(QueryError::NodeType(row, name)), - ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(row, name)), - ffi::TSQueryError_TSQueryErrorCapture => Err(QueryError::Capture(row, name)), - _ => Err(QueryError::Syntax(row, message)), + // Error types that report positions + _ => { + message = if let Some(line) = line_containing_error { + line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" + } else { + "Unexpected EOF".to_string() + }; + kind = match error_type { + ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure, + _ => QueryErrorKind::Syntax, + }; } - } else { - Err(QueryError::Syntax(row, message)) }; + + return Err(QueryError { + row, + column, + offset, + kind, + message, + }); } let string_count = unsafe { ffi::ts_query_string_count(ptr) }; @@ -1242,9 +1279,20 @@ impl Query { let mut length = 0u32; let raw_predicates = ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); - slice::from_raw_parts(raw_predicates, length as usize) + if length > 0 { + slice::from_raw_parts(raw_predicates, length as usize) + } else { + &[] + } }; + let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; + let row = source + .char_indices() + .take_while(|(i, _)| *i < byte_offset as usize) + .filter(|(_, c)| *c == '\n') + .count(); + let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; @@ -1259,10 +1307,13 @@ impl Query { } if p[0].type_ != type_string { - return Err(QueryError::Predicate(format!( - "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], - ))); + return Err(predicate_error( + row, + format!( + "Expected predicate to start with a function name. Got @{}.", + result.capture_names[p[0].value_id as usize], + ), + )); } // Build a predicate for each of the known predicate function names. @@ -1270,14 +1321,17 @@ impl Query { match operator_name.as_str() { "eq?" | "not-eq?" => { if p.len() != 3 { - return Err(QueryError::Predicate(format!( - "Wrong number of arguments to eq? predicate. Expected 2, got {}.", + return Err(predicate_error( + row, + format!( + "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", p.len() - 1 - ))); + ), + )); } if p[1].type_ != type_capture { - return Err(QueryError::Predicate(format!( - "First argument to eq? predicate must be a capture name. Got literal \"{}\".", + return Err(predicate_error(row, format!( + "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } @@ -1298,37 +1352,40 @@ impl Query { }); } - "match?" => { + "match?" | "not-match?" => { if p.len() != 3 { - return Err(QueryError::Predicate(format!( - "Wrong number of arguments to match? predicate. Expected 2, got {}.", + return Err(predicate_error(row, format!( + "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { - return Err(QueryError::Predicate(format!( - "First argument to match? predicate must be a capture name. Got literal \"{}\".", + return Err(predicate_error(row, format!( + "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } if p[2].type_ == type_capture { - return Err(QueryError::Predicate(format!( - "Second argument to match? predicate must be a literal. Got capture @{}.", + return Err(predicate_error(row, format!( + "Second argument to #match? predicate must be a literal. Got capture @{}.", result.capture_names[p[2].value_id as usize], ))); } + let is_positive = operator_name == "match?"; let regex = &string_values[p[2].value_id as usize]; text_predicates.push(TextPredicate::CaptureMatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { - QueryError::Predicate(format!("Invalid regex '{}'", regex)) + predicate_error(row, format!("Invalid regex '{}'", regex)) })?, + is_positive, )); } "set!" => property_settings.push(Self::parse_property( - "set!", + row, + &operator_name, &result.capture_names, &string_values, &p[1..], @@ -1336,6 +1393,7 @@ impl Query { "is?" | "is-not?" => property_predicates.push(( Self::parse_property( + row, &operator_name, &result.capture_names, &string_values, @@ -1449,18 +1507,30 @@ impl Query { unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } } + /// Check if a given step in a query is 'definite'. + /// + /// A query step is 'definite' if its parent pattern will be guaranteed to match + /// successfully once it reaches the step. + pub fn step_is_definite(&self, byte_offset: usize) -> bool { + unsafe { ffi::ts_query_step_is_definite(self.ptr.as_ptr(), byte_offset as u32) } + } + fn parse_property( + row: usize, function_name: &str, capture_names: &[String], string_values: &[String], args: &[ffi::TSQueryPredicateStep], ) -> Result { if args.len() == 0 || args.len() > 3 { - return Err(QueryError::Predicate(format!( - "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", - function_name, - args.len(), - ))); + return Err(predicate_error( + row, + format!( + "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", + function_name, + args.len(), + ), + )); } let mut capture_id = None; @@ -1470,10 +1540,13 @@ impl Query { for arg in args { if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture { if capture_id.is_some() { - return Err(QueryError::Predicate(format!( - "Invalid arguments to {} predicate. Unexpected second capture name @{}", - function_name, capture_names[arg.value_id as usize] - ))); + return Err(predicate_error( + row, + format!( + "Invalid arguments to {} predicate. Unexpected second capture name @{}", + function_name, capture_names[arg.value_id as usize] + ), + )); } capture_id = Some(arg.value_id as usize); } else if key.is_none() { @@ -1481,20 +1554,26 @@ impl Query { } else if value.is_none() { value = Some(string_values[arg.value_id as usize].as_str()); } else { - return Err(QueryError::Predicate(format!( - "Invalid arguments to {} predicate. Unexpected third argument @{}", - function_name, string_values[arg.value_id as usize] - ))); + return Err(predicate_error( + row, + format!( + "Invalid arguments to {} predicate. Unexpected third argument @{}", + function_name, string_values[arg.value_id as usize] + ), + )); } } if let Some(key) = key { Ok(QueryProperty::new(key, value, capture_id)) } else { - return Err(QueryError::Predicate(format!( - "Invalid arguments to {} predicate. Missing key argument", - function_name, - ))); + return Err(predicate_error( + row, + format!( + "Invalid arguments to {} predicate. Missing key argument", + function_name, + ), + )); } } } @@ -1581,11 +1660,15 @@ impl<'a> QueryMatch<'a> { cursor, id: m.id, pattern_index: m.pattern_index as usize, - captures: unsafe { - slice::from_raw_parts( - m.captures as *const QueryCapture<'a>, - m.capture_count as usize, - ) + captures: if m.capture_count > 0 { + unsafe { + slice::from_raw_parts( + m.captures as *const QueryCapture<'a>, + m.capture_count as usize, + ) + } + } else { + &[] }, } } @@ -1607,9 +1690,9 @@ impl<'a> QueryMatch<'a> { let node = self.capture_for_index(*i).unwrap(); (text_callback(node).as_ref() == s.as_bytes()) == *is_positive } - TextPredicate::CaptureMatchString(i, r) => { + TextPredicate::CaptureMatchString(i, r, is_positive) => { let node = self.capture_for_index(*i).unwrap(); - r.is_match(text_callback(node).as_ref()) + r.is_match(text_callback(node).as_ref()) == *is_positive } }) } @@ -1661,6 +1744,16 @@ impl<'a, T: AsRef<[u8]>> Iterator for QueryCaptures<'a, T> { } } +impl<'a> fmt::Debug for QueryMatch<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "QueryMatch {{ id: {}, pattern_index: {}, captures: {:?} }}", + self.id, self.pattern_index, self.captures + ) + } +} + impl PartialEq for Query { fn eq(&self, other: &Self) -> bool { self.ptr == other.ptr @@ -1744,9 +1837,66 @@ impl<'a> Into for &'a InputEdit { } } +impl<'a> LossyUtf8<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + LossyUtf8 { + bytes, + in_replacement: false, + } + } +} + +impl<'a> Iterator for LossyUtf8<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if self.bytes.is_empty() { + return None; + } + if self.in_replacement { + self.in_replacement = false; + return Some("\u{fffd}"); + } + match std::str::from_utf8(self.bytes) { + Ok(valid) => { + self.bytes = &[]; + Some(valid) + } + Err(error) => { + if let Some(error_len) = error.error_len() { + let error_start = error.valid_up_to(); + if error_start > 0 { + let result = + unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; + self.bytes = &self.bytes[(error_start + error_len)..]; + self.in_replacement = true; + Some(result) + } else { + self.bytes = &self.bytes[error_len..]; + Some("\u{fffd}") + } + } else { + None + } + } + } + } +} + +fn predicate_error(row: usize, message: String) -> QueryError { + QueryError { + kind: QueryErrorKind::Predicate, + row, + column: 0, + offset: 0, + message, + } +} + unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} unsafe impl Send for Tree {} +unsafe impl Send for QueryCursor {} unsafe impl Sync for Language {} unsafe impl Sync for Query {} diff --git a/lib/binding_web/README.md b/lib/binding_web/README.md index ba1b4cb6fa..c02d033620 100644 --- a/lib/binding_web/README.md +++ b/lib/binding_web/README.md @@ -7,7 +7,7 @@ WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-si ### Setup -You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/tag/0.14.7) and load them using a standalone script: +You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: ```html