Merge pull request #535 from Adylic/feature/thai-context-substitution…

…s-support feat: Added a thai context to the bidi and substitution features
opentypejs · Mar 7, 2023 · 814e528 · 814e528
2 parents d1deda1 + 4d0405b
commit 814e528
Show file tree

Hide file tree

Showing 9 changed files with 245 additions and 10 deletions.
diff --git a/src/bidi.js b/src/bidi.js
@@ -11,6 +11,10 @@ import arabicPresentationForms from './features/arab/arabicPresentationForms.js'
 import arabicRequiredLigatures from './features/arab/arabicRequiredLigatures.js';
 import latinWordCheck from './features/latn/contextCheck/latinWord.js';
 import latinLigature from './features/latn/latinLigatures.js';
+import thaiWordCheck from './features/thai/contextCheck/thaiWord.js';
+import thaiGlyphComposition from './features/thai/thaiGlyphComposition.js';
+import thaiLigatures from './features/thai/thaiLigatures.js';
+import thaiRequiredLigatures from './features/thai/thaiRequiredLigatures.js';
 import unicodeVariationSequenceCheck from './features/unicode/contextCheck/variationSequenceCheck.js';
 import unicodeVariationSequences from './features/unicode/variationSequences.js';
 
@@ -41,6 +45,7 @@ Bidi.prototype.contextChecks = ({
     latinWordCheck,
     arabicWordCheck,
     arabicSentenceCheck,
+    thaiWordCheck,
     unicodeVariationSequenceCheck
 });
 
@@ -62,6 +67,7 @@ function tokenizeText() {
     registerContextChecker.call(this, 'latinWord');
     registerContextChecker.call(this, 'arabicWord');
     registerContextChecker.call(this, 'arabicSentence');
+    registerContextChecker.call(this, 'thaiWord');
     registerContextChecker.call(this, 'unicodeVariationSequence');
     return this.tokenizer.tokenize(this.text);
 }
@@ -156,10 +162,7 @@ function applyArabicPresentationForms() {
  * Apply required arabic ligatures
  */
 function applyArabicRequireLigatures() {
-    const script = 'arab';
-    if (!Object.prototype.hasOwnProperty.call(this.featuresTags, script)) return;
-    const tags = this.featuresTags[script];
-    if (tags.indexOf('rlig') === -1) return;
+    if (!this.hasFeatureEnabled('arab', 'rlig')) return;
     checkGlyphIndexStatus.call(this);
     const ranges = this.tokenizer.getContextRanges('arabicWord');
     ranges.forEach(range => {
@@ -171,10 +174,7 @@ function applyArabicRequireLigatures() {
  * Apply required arabic ligatures
  */
 function applyLatinLigatures() {
-    const script = 'latn';
-    if (!Object.prototype.hasOwnProperty.call(this.featuresTags, script)) return;
-    const tags = this.featuresTags[script];
-    if (tags.indexOf('liga') === -1) return;
+    if (!this.hasFeatureEnabled('latn', 'liga')) return;
     checkGlyphIndexStatus.call(this);
     const ranges = this.tokenizer.getContextRanges('latinWord');
     ranges.forEach(range => {
@@ -189,6 +189,20 @@ function applyUnicodeVariationSequences() {
     });
 }
 
+/**
+ * Apply available thai features
+ */
+function applyThaiFeatures() {
+    checkGlyphIndexStatus.call(this);
+    const ranges = this.tokenizer.getContextRanges('thaiWord');
+    ranges.forEach(range => {
+        if (this.hasFeatureEnabled('thai', 'liga')) thaiLigatures.call(this, range);
+        if (this.hasFeatureEnabled('thai', 'rlig')) thaiRequiredLigatures.call(this, range);
+        if (this.hasFeatureEnabled('thai', 'ccmp')) thaiGlyphComposition.call(this, range);
+    });
+
+}
+
 /**
  * Check if a context is registered
  * @param {string} contextId context id
@@ -199,6 +213,9 @@ Bidi.prototype.checkContextReady = function (contextId) {
 
 /**
  * Apply features to registered contexts
+ *
+ * - A Glyph Composition (ccmp) feature should be always applied
+ * https://learn.microsoft.com/en-us/typography/opentype/spec/features_ae#tag-ccmp
  */
 Bidi.prototype.applyFeaturesToContexts = function () {
     if (this.checkContextReady('arabicWord')) {
@@ -211,11 +228,24 @@ Bidi.prototype.applyFeaturesToContexts = function () {
     if (this.checkContextReady('arabicSentence')) {
         reverseArabicSentences.call(this);
     }
+    if (this.checkContextReady('thaiWord')) {
+        applyThaiFeatures.call(this);
+    }
     if (this.checkContextReady('unicodeVariationSequence')) {
         applyUnicodeVariationSequences.call(this);
     }
 };
 
+/**
+ * Check whatever feature is successfully enabled for a script
+ * @param {string} script
+ * @param {string} tag feature name
+ * @returns {boolean}
+ */
+Bidi.prototype.hasFeatureEnabled = function(script, tag) {
+    return (this.featuresTags[script] || []).indexOf(tag) !== -1;
+};
+
 /**
  * process text input
  * @param {string} text an input text

diff --git a/src/char.js b/src/char.js
@@ -27,6 +27,14 @@ export function isTashkeelArabicChar(char) {
     return /[\u0600-\u0605\u060C-\u060E\u0610-\u061B\u061E\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED]/.test(char);
 }
 
+/**
+ * Check if a char is Thai
+ * @param {string} c a single char
+ */
+export function isThaiChar(c) {
+    return /[\u0E00-\u0E7F]/.test(c);
+}
+
 /**
  * Check if a char is Latin
  * @param {string} c a single char

diff --git a/src/features/thai/contextCheck/thaiWord.js b/src/features/thai/contextCheck/thaiWord.js
@@ -0,0 +1,30 @@
+import { isThaiChar } from '../../../char.js';
+
+/**
+ * Thai word context checkers
+ */
+function thaiWordStartCheck(contextParams) {
+    const char = contextParams.current;
+    const prevChar = contextParams.get(-1);
+    return (
+        // ? thai first char
+        (prevChar === null && isThaiChar(char)) ||
+        // ? thai char preceded with a non thai char
+        (!isThaiChar(prevChar) && isThaiChar(char))
+    );
+}
+
+function thaiWordEndCheck(contextParams) {
+    const nextChar = contextParams.get(1);
+    return (
+        // ? last thai char
+        (nextChar === null) ||
+        // ? next char is not thai
+        (!isThaiChar(nextChar))
+    );
+}
+
+export default {
+    startCheck: thaiWordStartCheck,
+    endCheck: thaiWordEndCheck
+};
diff --git a/src/features/thai/thaiGlyphComposition.js b/src/features/thai/thaiGlyphComposition.js
@@ -0,0 +1,40 @@
+/**
+ * Apply Thai Glyph Composition feature to tokens
+ */
+
+import { ContextParams } from '../../tokenizer.js';
+import applySubstitution from '../applySubstitution.js';
+
+/**
+  * Update context params
+  * @param {any} tokens a list of tokens
+  * @param {number} index current item index
+  */
+function getContextParams(tokens, index) {
+    const context = tokens.map(token => token.activeState.value);
+    return new ContextParams(context, index || 0);
+}
+
+/**
+  * Apply Thai required glyphs composition substitutions
+  * @param {ContextRange} range a range of tokens
+  */
+function thaiGlyphComposition(range) {
+    const script = 'thai';
+    let tokens = this.tokenizer.getRangeTokens(range);
+    let contextParams = getContextParams(tokens, 0);
+    contextParams.context.forEach((glyphIndex, index) => {
+        contextParams.setCurrentIndex(index);
+        let substitutions = this.query.lookupFeature({
+            tag: 'ccmp', script, contextParams
+        });
+        if (substitutions.length) {
+            substitutions.forEach(
+                action => applySubstitution(action, tokens, index)
+            );
+            contextParams = getContextParams(tokens, index);
+        }
+    });
+}
+
+export default thaiGlyphComposition;
diff --git a/src/features/thai/thaiLigatures.js b/src/features/thai/thaiLigatures.js
@@ -0,0 +1,40 @@
+/**
+ * Apply Thai Ligatures feature to tokens
+ */
+
+import { ContextParams } from '../../tokenizer.js';
+import applySubstitution from '../applySubstitution.js';
+
+/**
+  * Update context params
+  * @param {any} tokens a list of tokens
+  * @param {number} index current item index
+  */
+function getContextParams(tokens, index) {
+    const context = tokens.map(token => token.activeState.value);
+    return new ContextParams(context, index || 0);
+}
+
+/**
+  * Apply Thai required glyphs composition substitutions
+  * @param {ContextRange} range a range of tokens
+  */
+function thaiLigatures(range) {
+    const script = 'thai';
+    let tokens = this.tokenizer.getRangeTokens(range);
+    let contextParams = getContextParams(tokens, 0);
+    contextParams.context.forEach((glyphIndex, index) => {
+        contextParams.setCurrentIndex(index);
+        let substitutions = this.query.lookupFeature({
+            tag: 'liga', script, contextParams
+        });
+        if (substitutions.length) {
+            substitutions.forEach(
+                action => applySubstitution(action, tokens, index)
+            );
+            contextParams = getContextParams(tokens, index);
+        }
+    });
+}
+
+export default thaiLigatures;
diff --git a/src/features/thai/thaiRequiredLigatures.js b/src/features/thai/thaiRequiredLigatures.js
@@ -0,0 +1,40 @@
+/**
+ * Apply Thai required ligatures feature to tokens
+ */
+
+import { ContextParams } from '../../tokenizer.js';
+import applySubstitution from '../applySubstitution.js';
+
+/**
+ * Update context params
+ * @param {any} tokens a list of tokens
+ * @param {number} index current item index
+ */
+function getContextParams(tokens, index) {
+    const context = tokens.map(token => token.activeState.value);
+    return new ContextParams(context, index || 0);
+}
+
+/**
+  * Apply Thai required glyphs composition substitutions
+  * @param {ContextRange} range a range of tokens
+  */
+function thaiRequiredLigatures(range) {
+    const script = 'thai';
+    let tokens = this.tokenizer.getRangeTokens(range);
+    let contextParams = getContextParams(tokens, 0);
+    contextParams.context.forEach((glyphIndex, index) => {
+        contextParams.setCurrentIndex(index);
+        let substitutions = this.query.lookupFeature({
+            tag: 'rlig', script, contextParams
+        });
+        if (substitutions.length) {
+            substitutions.forEach(
+                action => applySubstitution(action, tokens, index)
+            );
+            contextParams = getContextParams(tokens, index);
+        }
+    });
+}
+
+export default thaiRequiredLigatures;
diff --git a/src/font.js b/src/font.js
@@ -299,7 +299,8 @@ Font.prototype.defaultRenderOptions = {
          * and shouldn't be turned off when rendering arabic text.
          */
         { script: 'arab', tags: ['init', 'medi', 'fina', 'rlig'] },
-        { script: 'latn', tags: ['liga', 'rlig'] }
+        { script: 'latn', tags: ['liga', 'rlig'] },
+        { script: 'thai', tags: ['liga', 'rlig', 'ccmp'] },
     ]
 };
 

diff --git a/test/bidi.js b/test/bidi.js
@@ -10,6 +10,7 @@ describe('bidi.js', function() {
     let bidiFira;
     let bidiScheherazade;
     let arabicTokenizer;
+
     before(function () {
         /**
          * arab
@@ -96,5 +97,50 @@ describe('bidi.js', function() {
             assert.deepEqual(font.stringToGlyphIndexes(string), [1, 1, 2, 1, 4, 3]);
         });
     });
-});
 
+    describe('thai scripts', () => {
+
+        let thaiFont;
+        let bidiThai;
+
+        before(()=> {
+            thaiFont = loadSync('./test/fonts/NotoSansThai-Medium-Testing-v1.ttf');
+            bidiThai = new Bidi();
+            bidiThai.registerModifier(
+                'glyphIndex', null, token => thaiFont.charToGlyphIndex(token.char)
+            );
+            const requiredThaiFeatures = [{
+                script: 'thai',
+                tags: ['liga', 'rlig', 'ccmp']
+            }];
+            bidiThai.applyFeatures(thaiFont, requiredThaiFeatures);
+        });
+
+        describe('thai features', () => {
+            it('should apply glyph composition', () => {
+                let glyphIndexes = bidiThai.getTextGlyphs('่ํ');
+                assert.deepEqual(glyphIndexes, [451]);
+            });
+
+            it('should apply glyph ligatures', () => {
+                let glyphIndexes = bidiThai.getTextGlyphs('ฤๅ');
+                assert.deepEqual(glyphIndexes, [459]);
+            });
+
+            it('should apply glyph required ligatures', () => {
+                let glyphIndexes = bidiThai.getTextGlyphs('ลล');
+                assert.deepEqual(glyphIndexes, [352]);
+            });
+        });
+
+        describe('thai contexts', () => {
+            it('should match thai words in a given text', () => {
+                const tokenizer = bidiThai.tokenizer;
+                tokenizer.tokenize('The king said: เป็นคนใจดีสำหรับทุกคน because ความรักคือทุกสิ่ง');
+                const ranges = tokenizer.getContextRanges('thaiWord');
+                const words = ranges.map(range => tokenizer.rangeToText(range));
+                assert.deepEqual(words, ['เป็นคนใจดีสำหรับทุกคน', 'ความรักคือทุกสิ่ง']);
+            });
+        });
+    });
+});
diff --git a/test/fonts/NotoSansThai-Medium-Testing-v1.ttf b/test/fonts/NotoSansThai-Medium-Testing-v1.ttf