update 0.10

uben0 · Dec 23, 2023 · c0765e3 · c0765e3
1 parent 39c5976
commit c0765e3
Show file tree

Hide file tree

Showing 9 changed files with 5,772 additions and 5,892 deletions.
diff --git a/DOC.md b/DOC.md
@@ -10,6 +10,9 @@ List of tests exposing particular behaviors from Typst
 
 ## FIXME
 
+- [ ] Test `E11`: In Helix, causes a segfault
+- [ ] Test `E12`: Matching square brackets in text are paired
+- [ ] Test `E13`: Point ending a ref is not part of the ref
 - [X] ~Test `E10`: Math shorthand and letter can applied~
 - [X] ~Test `E09`: Indentation and comments~
 - [X] ~Test `E01`: Group termination in math~
@@ -69,14 +72,19 @@ Which have the same hierarchy as:
 
 ## Character class
 
-Four character classes are defined in the external scanner:
+Five character classes are defined in the external scanner:
 
 - space (`is_sp`)
 - line break (`is_lb`)
 - xid start (`is_id_start`)
 - xid continue (`is_id_continue`)
+- word part (`is_word_part`)
 
-The two function `is_id_start` and `is_id_continue` are implemented as binary search. The character list is based on the Unicode database which can be found [here](unicode.txt).
+The three functions `is_id_start`, `is_id_continue` and `is_word_part` are implemented as binary search.
+
+The character list is based on the Unicode database which can be found here: https://www.unicode.org/Public/UCD/latest/ucd/
+
+A utility is used to produce those tables: https://github.com/uben0/unicode-table
 
 ## Barrier
 

diff --git a/README.md b/README.md
@@ -20,23 +20,22 @@ The documentation of this implementation is available in [DOC](DOC.md).
 
 ## TODO
 
-- [ ] More tests, objectif 1000, current 372
+- [ ] More tests, objectif 1000, current 397
 - [ ] Documentation
   - [ ] Installation
     - [X] Helix
     - [X] Emacs
     - [ ] NeoVim (work in progress)
   - [X] Implementation
-- [X] Update
-  - [X] 0.9
-  - [X] 0.8
+- [X] Update 0.10
+  - [X] CJK script
 - [X] Fixme
 - [X] Optimization
   - [X] Parser size
   - [X] Math ident
   - [X] Extras
 - [ ] Features
-  - [ ] Foldable sections
+  - [ ] Foldable sections (experimentations ongoing)
 
 ## DONE
 

diff --git a/corpus/fixme.scm b/corpus/fixme.scm
@@ -1,5 +1,5 @@
 =====================
-Fixme 000
+Fixme E11
 =====================
 ```typst
 _
@@ -12,3 +12,25 @@ _
 	(raw_blck
     lang: (ident)
     (blob)))
+
+
+=====================
+Fixme E12
+=====================
+#[[]]
+--------------------
+
+(source_file
+	(content
+		(text)))
+
+
+=====================
+Fixme E13
+=====================
+@hello.
+--------------------
+
+(source_file
+	(ref)
+	(text))
diff --git a/corpus/test.scm b/corpus/test.scm
@@ -2809,6 +2809,8 @@ Hello"World"
 ---------------------
 
 (source_file
+	(text)
+	(quote)
 	(text)
 	(quote))
 
@@ -5864,9 +5866,21 @@ Test 394
 =====================
 Test 395
 =====================
-@hello.
+ぁ_ぁ_
 --------------------
 
 (source_file
-	(ref)
-	(text))
+	(text)
+	(emph
+		(text)))
+
+
+=====================
+Test 396
+=====================
+_e_e_
+--------------------
+
+(source_file
+	(emph
+		(text)))
diff --git a/grammar.js b/grammar.js
@@ -69,6 +69,7 @@ module.exports = grammar({
     $._token_raw_lang,
     $._token_identifier,
     $._token_label,
+    $._token_anti_markup,
 
     $.comment,
     $._sp,
@@ -109,8 +110,6 @@ module.exports = grammar({
     // a line break in a content context
     _content_lb: $ => seq(optional($._redent), choice($.parbreak, $._lb)),
 
-    // this token matches `_`, `*` and `"` when they are between alphanumeric
-    // characters because, in that case, they do not count as markup
     _anti_markup: $ => token(seq(ALPHANUM, /[_*"]/, ALPHANUM)),
 
     linebreak: $ => /\\/,
@@ -133,7 +132,7 @@ module.exports = grammar({
     ),
 
     text: $ => prec.right(repeat1(choice(
-      $._anti_markup,
+      $._token_anti_markup,
       $.escape,
       /./,
     ))),
@@ -508,6 +507,7 @@ module.exports = grammar({
       'align',
       'alignement',
       'aqua',
+      'arguments',
       'array',
       'assert',
       'bibliography',
@@ -547,6 +547,7 @@ module.exports = grammar({
       'fraction',
       'fuchsia',
       'function',
+      'gradient',
       'gray',
       'green',
       'grid',
@@ -582,6 +583,7 @@ module.exports = grammar({
       'navy',
       'numbering',
       'oklab',
+      'oklch',
       'olive',
       'orange',
       'outline',

diff --git a/src/grammar.json b/src/grammar.json
@@ -283,7 +283,7 @@
           "members": [
             {
               "type": "SYMBOL",
-              "name": "_anti_markup"
+              "name": "_token_anti_markup"
             },
             {
               "type": "SYMBOL",
@@ -3369,6 +3369,10 @@
                   "type": "STRING",
                   "value": "aqua"
                 },
+                {
+                  "type": "STRING",
+                  "value": "arguments"
+                },
                 {
                   "type": "STRING",
                   "value": "array"
@@ -3525,6 +3529,10 @@
                   "type": "STRING",
                   "value": "function"
                 },
+                {
+                  "type": "STRING",
+                  "value": "gradient"
+                },
                 {
                   "type": "STRING",
                   "value": "gray"
@@ -3665,6 +3673,10 @@
                   "type": "STRING",
                   "value": "oklab"
                 },
+                {
+                  "type": "STRING",
+                  "value": "oklch"
+                },
                 {
                   "type": "STRING",
                   "value": "olive"
@@ -4078,6 +4090,10 @@
       "type": "SYMBOL",
       "name": "_token_label"
     },
+    {
+      "type": "SYMBOL",
+      "name": "_token_anti_markup"
+    },
     {
       "type": "SYMBOL",
       "name": "comment"