diff --git a/src/bidi.js b/src/bidi.js index 89264825..e393af13 100644 --- a/src/bidi.js +++ b/src/bidi.js @@ -11,6 +11,10 @@ import arabicPresentationForms from './features/arab/arabicPresentationForms.js' import arabicRequiredLigatures from './features/arab/arabicRequiredLigatures.js'; import latinWordCheck from './features/latn/contextCheck/latinWord.js'; import latinLigature from './features/latn/latinLigatures.js'; +import thaiWordCheck from './features/thai/contextCheck/thaiWord.js'; +import thaiGlyphComposition from './features/thai/thaiGlyphComposition.js'; +import thaiLigatures from './features/thai/thaiLigatures.js'; +import thaiRequiredLigatures from './features/thai/thaiRequiredLigatures.js'; import unicodeVariationSequenceCheck from './features/unicode/contextCheck/variationSequenceCheck.js'; import unicodeVariationSequences from './features/unicode/variationSequences.js'; @@ -41,6 +45,7 @@ Bidi.prototype.contextChecks = ({ latinWordCheck, arabicWordCheck, arabicSentenceCheck, + thaiWordCheck, unicodeVariationSequenceCheck }); @@ -62,6 +67,7 @@ function tokenizeText() { registerContextChecker.call(this, 'latinWord'); registerContextChecker.call(this, 'arabicWord'); registerContextChecker.call(this, 'arabicSentence'); + registerContextChecker.call(this, 'thaiWord'); registerContextChecker.call(this, 'unicodeVariationSequence'); return this.tokenizer.tokenize(this.text); } @@ -156,10 +162,7 @@ function applyArabicPresentationForms() { * Apply required arabic ligatures */ function applyArabicRequireLigatures() { - const script = 'arab'; - if (!Object.prototype.hasOwnProperty.call(this.featuresTags, script)) return; - const tags = this.featuresTags[script]; - if (tags.indexOf('rlig') === -1) return; + if (!this.hasFeatureEnabled('arab', 'rlig')) return; checkGlyphIndexStatus.call(this); const ranges = this.tokenizer.getContextRanges('arabicWord'); ranges.forEach(range => { @@ -171,10 +174,7 @@ function applyArabicRequireLigatures() { * Apply required arabic ligatures */ function applyLatinLigatures() { - const script = 'latn'; - if (!Object.prototype.hasOwnProperty.call(this.featuresTags, script)) return; - const tags = this.featuresTags[script]; - if (tags.indexOf('liga') === -1) return; + if (!this.hasFeatureEnabled('latn', 'liga')) return; checkGlyphIndexStatus.call(this); const ranges = this.tokenizer.getContextRanges('latinWord'); ranges.forEach(range => { @@ -189,6 +189,20 @@ function applyUnicodeVariationSequences() { }); } +/** + * Apply available thai features + */ +function applyThaiFeatures() { + checkGlyphIndexStatus.call(this); + const ranges = this.tokenizer.getContextRanges('thaiWord'); + ranges.forEach(range => { + if (this.hasFeatureEnabled('thai', 'liga')) thaiLigatures.call(this, range); + if (this.hasFeatureEnabled('thai', 'rlig')) thaiRequiredLigatures.call(this, range); + if (this.hasFeatureEnabled('thai', 'ccmp')) thaiGlyphComposition.call(this, range); + }); + +} + /** * Check if a context is registered * @param {string} contextId context id @@ -199,6 +213,9 @@ Bidi.prototype.checkContextReady = function (contextId) { /** * Apply features to registered contexts + * + * - A Glyph Composition (ccmp) feature should be always applied + * https://learn.microsoft.com/en-us/typography/opentype/spec/features_ae#tag-ccmp */ Bidi.prototype.applyFeaturesToContexts = function () { if (this.checkContextReady('arabicWord')) { @@ -211,11 +228,24 @@ Bidi.prototype.applyFeaturesToContexts = function () { if (this.checkContextReady('arabicSentence')) { reverseArabicSentences.call(this); } + if (this.checkContextReady('thaiWord')) { + applyThaiFeatures.call(this); + } if (this.checkContextReady('unicodeVariationSequence')) { applyUnicodeVariationSequences.call(this); } }; +/** + * Check whatever feature is successfully enabled for a script + * @param {string} script + * @param {string} tag feature name + * @returns {boolean} + */ +Bidi.prototype.hasFeatureEnabled = function(script, tag) { + return (this.featuresTags[script] || []).indexOf(tag) !== -1; +}; + /** * process text input * @param {string} text an input text diff --git a/src/char.js b/src/char.js index 29ecea93..b192f210 100644 --- a/src/char.js +++ b/src/char.js @@ -27,6 +27,14 @@ export function isTashkeelArabicChar(char) { return /[\u0600-\u0605\u060C-\u060E\u0610-\u061B\u061E\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED]/.test(char); } +/** + * Check if a char is Thai + * @param {string} c a single char + */ +export function isThaiChar(c) { + return /[\u0E00-\u0E7F]/.test(c); +} + /** * Check if a char is Latin * @param {string} c a single char diff --git a/src/features/thai/contextCheck/thaiWord.js b/src/features/thai/contextCheck/thaiWord.js new file mode 100644 index 00000000..5dc4810b --- /dev/null +++ b/src/features/thai/contextCheck/thaiWord.js @@ -0,0 +1,30 @@ +import { isThaiChar } from '../../../char.js'; + +/** + * Thai word context checkers + */ +function thaiWordStartCheck(contextParams) { + const char = contextParams.current; + const prevChar = contextParams.get(-1); + return ( + // ? thai first char + (prevChar === null && isThaiChar(char)) || + // ? thai char preceded with a non thai char + (!isThaiChar(prevChar) && isThaiChar(char)) + ); +} + +function thaiWordEndCheck(contextParams) { + const nextChar = contextParams.get(1); + return ( + // ? last thai char + (nextChar === null) || + // ? next char is not thai + (!isThaiChar(nextChar)) + ); +} + +export default { + startCheck: thaiWordStartCheck, + endCheck: thaiWordEndCheck +}; diff --git a/src/features/thai/thaiGlyphComposition.js b/src/features/thai/thaiGlyphComposition.js new file mode 100644 index 00000000..4eff31f5 --- /dev/null +++ b/src/features/thai/thaiGlyphComposition.js @@ -0,0 +1,40 @@ +/** + * Apply Thai Glyph Composition feature to tokens + */ + +import { ContextParams } from '../../tokenizer.js'; +import applySubstitution from '../applySubstitution.js'; + +/** + * Update context params + * @param {any} tokens a list of tokens + * @param {number} index current item index + */ +function getContextParams(tokens, index) { + const context = tokens.map(token => token.activeState.value); + return new ContextParams(context, index || 0); +} + +/** + * Apply Thai required glyphs composition substitutions + * @param {ContextRange} range a range of tokens + */ +function thaiGlyphComposition(range) { + const script = 'thai'; + let tokens = this.tokenizer.getRangeTokens(range); + let contextParams = getContextParams(tokens, 0); + contextParams.context.forEach((glyphIndex, index) => { + contextParams.setCurrentIndex(index); + let substitutions = this.query.lookupFeature({ + tag: 'ccmp', script, contextParams + }); + if (substitutions.length) { + substitutions.forEach( + action => applySubstitution(action, tokens, index) + ); + contextParams = getContextParams(tokens, index); + } + }); +} + +export default thaiGlyphComposition; diff --git a/src/features/thai/thaiLigatures.js b/src/features/thai/thaiLigatures.js new file mode 100644 index 00000000..a192d838 --- /dev/null +++ b/src/features/thai/thaiLigatures.js @@ -0,0 +1,40 @@ +/** + * Apply Thai Ligatures feature to tokens + */ + +import { ContextParams } from '../../tokenizer.js'; +import applySubstitution from '../applySubstitution.js'; + +/** + * Update context params + * @param {any} tokens a list of tokens + * @param {number} index current item index + */ +function getContextParams(tokens, index) { + const context = tokens.map(token => token.activeState.value); + return new ContextParams(context, index || 0); +} + +/** + * Apply Thai required glyphs composition substitutions + * @param {ContextRange} range a range of tokens + */ +function thaiLigatures(range) { + const script = 'thai'; + let tokens = this.tokenizer.getRangeTokens(range); + let contextParams = getContextParams(tokens, 0); + contextParams.context.forEach((glyphIndex, index) => { + contextParams.setCurrentIndex(index); + let substitutions = this.query.lookupFeature({ + tag: 'liga', script, contextParams + }); + if (substitutions.length) { + substitutions.forEach( + action => applySubstitution(action, tokens, index) + ); + contextParams = getContextParams(tokens, index); + } + }); +} + +export default thaiLigatures; diff --git a/src/features/thai/thaiRequiredLigatures.js b/src/features/thai/thaiRequiredLigatures.js new file mode 100644 index 00000000..5dc1a7fb --- /dev/null +++ b/src/features/thai/thaiRequiredLigatures.js @@ -0,0 +1,40 @@ +/** + * Apply Thai required ligatures feature to tokens + */ + +import { ContextParams } from '../../tokenizer.js'; +import applySubstitution from '../applySubstitution.js'; + +/** + * Update context params + * @param {any} tokens a list of tokens + * @param {number} index current item index + */ +function getContextParams(tokens, index) { + const context = tokens.map(token => token.activeState.value); + return new ContextParams(context, index || 0); +} + +/** + * Apply Thai required glyphs composition substitutions + * @param {ContextRange} range a range of tokens + */ +function thaiRequiredLigatures(range) { + const script = 'thai'; + let tokens = this.tokenizer.getRangeTokens(range); + let contextParams = getContextParams(tokens, 0); + contextParams.context.forEach((glyphIndex, index) => { + contextParams.setCurrentIndex(index); + let substitutions = this.query.lookupFeature({ + tag: 'rlig', script, contextParams + }); + if (substitutions.length) { + substitutions.forEach( + action => applySubstitution(action, tokens, index) + ); + contextParams = getContextParams(tokens, index); + } + }); +} + +export default thaiRequiredLigatures; diff --git a/src/font.js b/src/font.js index b357bba7..6e02c189 100644 --- a/src/font.js +++ b/src/font.js @@ -299,7 +299,8 @@ Font.prototype.defaultRenderOptions = { * and shouldn't be turned off when rendering arabic text. */ { script: 'arab', tags: ['init', 'medi', 'fina', 'rlig'] }, - { script: 'latn', tags: ['liga', 'rlig'] } + { script: 'latn', tags: ['liga', 'rlig'] }, + { script: 'thai', tags: ['liga', 'rlig', 'ccmp'] }, ] }; diff --git a/test/bidi.js b/test/bidi.js index aecadf95..2f2b8601 100644 --- a/test/bidi.js +++ b/test/bidi.js @@ -10,6 +10,7 @@ describe('bidi.js', function() { let bidiFira; let bidiScheherazade; let arabicTokenizer; + before(function () { /** * arab @@ -96,5 +97,50 @@ describe('bidi.js', function() { assert.deepEqual(font.stringToGlyphIndexes(string), [1, 1, 2, 1, 4, 3]); }); }); -}); + describe('thai scripts', () => { + + let thaiFont; + let bidiThai; + + before(()=> { + thaiFont = loadSync('./test/fonts/NotoSansThai-Medium-Testing-v1.ttf'); + bidiThai = new Bidi(); + bidiThai.registerModifier( + 'glyphIndex', null, token => thaiFont.charToGlyphIndex(token.char) + ); + const requiredThaiFeatures = [{ + script: 'thai', + tags: ['liga', 'rlig', 'ccmp'] + }]; + bidiThai.applyFeatures(thaiFont, requiredThaiFeatures); + }); + + describe('thai features', () => { + it('should apply glyph composition', () => { + let glyphIndexes = bidiThai.getTextGlyphs('่ํ'); + assert.deepEqual(glyphIndexes, [451]); + }); + + it('should apply glyph ligatures', () => { + let glyphIndexes = bidiThai.getTextGlyphs('ฤๅ'); + assert.deepEqual(glyphIndexes, [459]); + }); + + it('should apply glyph required ligatures', () => { + let glyphIndexes = bidiThai.getTextGlyphs('ลล'); + assert.deepEqual(glyphIndexes, [352]); + }); + }); + + describe('thai contexts', () => { + it('should match thai words in a given text', () => { + const tokenizer = bidiThai.tokenizer; + tokenizer.tokenize('The king said: เป็นคนใจดีสำหรับทุกคน because ความรักคือทุกสิ่ง'); + const ranges = tokenizer.getContextRanges('thaiWord'); + const words = ranges.map(range => tokenizer.rangeToText(range)); + assert.deepEqual(words, ['เป็นคนใจดีสำหรับทุกคน', 'ความรักคือทุกสิ่ง']); + }); + }); + }); +}); diff --git a/test/fonts/NotoSansThai-Medium-Testing-v1.ttf b/test/fonts/NotoSansThai-Medium-Testing-v1.ttf new file mode 100644 index 00000000..b8629e2d Binary files /dev/null and b/test/fonts/NotoSansThai-Medium-Testing-v1.ttf differ