Skip to content

Commit

Permalink
Merge pull request #535 from Adylic/feature/thai-context-substitution…
Browse files Browse the repository at this point in the history
…s-support

feat: Added a thai context to the bidi and substitution features
  • Loading branch information
Connum authored Mar 7, 2023
2 parents d1deda1 + 4d0405b commit 814e528
Show file tree
Hide file tree
Showing 9 changed files with 245 additions and 10 deletions.
46 changes: 38 additions & 8 deletions src/bidi.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import arabicPresentationForms from './features/arab/arabicPresentationForms.js'
import arabicRequiredLigatures from './features/arab/arabicRequiredLigatures.js';
import latinWordCheck from './features/latn/contextCheck/latinWord.js';
import latinLigature from './features/latn/latinLigatures.js';
import thaiWordCheck from './features/thai/contextCheck/thaiWord.js';
import thaiGlyphComposition from './features/thai/thaiGlyphComposition.js';
import thaiLigatures from './features/thai/thaiLigatures.js';
import thaiRequiredLigatures from './features/thai/thaiRequiredLigatures.js';
import unicodeVariationSequenceCheck from './features/unicode/contextCheck/variationSequenceCheck.js';
import unicodeVariationSequences from './features/unicode/variationSequences.js';

Expand Down Expand Up @@ -41,6 +45,7 @@ Bidi.prototype.contextChecks = ({
latinWordCheck,
arabicWordCheck,
arabicSentenceCheck,
thaiWordCheck,
unicodeVariationSequenceCheck
});

Expand All @@ -62,6 +67,7 @@ function tokenizeText() {
registerContextChecker.call(this, 'latinWord');
registerContextChecker.call(this, 'arabicWord');
registerContextChecker.call(this, 'arabicSentence');
registerContextChecker.call(this, 'thaiWord');
registerContextChecker.call(this, 'unicodeVariationSequence');
return this.tokenizer.tokenize(this.text);
}
Expand Down Expand Up @@ -156,10 +162,7 @@ function applyArabicPresentationForms() {
* Apply required arabic ligatures
*/
function applyArabicRequireLigatures() {
const script = 'arab';
if (!Object.prototype.hasOwnProperty.call(this.featuresTags, script)) return;
const tags = this.featuresTags[script];
if (tags.indexOf('rlig') === -1) return;
if (!this.hasFeatureEnabled('arab', 'rlig')) return;
checkGlyphIndexStatus.call(this);
const ranges = this.tokenizer.getContextRanges('arabicWord');
ranges.forEach(range => {
Expand All @@ -171,10 +174,7 @@ function applyArabicRequireLigatures() {
* Apply required arabic ligatures
*/
function applyLatinLigatures() {
const script = 'latn';
if (!Object.prototype.hasOwnProperty.call(this.featuresTags, script)) return;
const tags = this.featuresTags[script];
if (tags.indexOf('liga') === -1) return;
if (!this.hasFeatureEnabled('latn', 'liga')) return;
checkGlyphIndexStatus.call(this);
const ranges = this.tokenizer.getContextRanges('latinWord');
ranges.forEach(range => {
Expand All @@ -189,6 +189,20 @@ function applyUnicodeVariationSequences() {
});
}

/**
* Apply available thai features
*/
function applyThaiFeatures() {
checkGlyphIndexStatus.call(this);
const ranges = this.tokenizer.getContextRanges('thaiWord');
ranges.forEach(range => {
if (this.hasFeatureEnabled('thai', 'liga')) thaiLigatures.call(this, range);
if (this.hasFeatureEnabled('thai', 'rlig')) thaiRequiredLigatures.call(this, range);
if (this.hasFeatureEnabled('thai', 'ccmp')) thaiGlyphComposition.call(this, range);
});

}

/**
* Check if a context is registered
* @param {string} contextId context id
Expand All @@ -199,6 +213,9 @@ Bidi.prototype.checkContextReady = function (contextId) {

/**
* Apply features to registered contexts
*
* - A Glyph Composition (ccmp) feature should be always applied
* https://learn.microsoft.com/en-us/typography/opentype/spec/features_ae#tag-ccmp
*/
Bidi.prototype.applyFeaturesToContexts = function () {
if (this.checkContextReady('arabicWord')) {
Expand All @@ -211,11 +228,24 @@ Bidi.prototype.applyFeaturesToContexts = function () {
if (this.checkContextReady('arabicSentence')) {
reverseArabicSentences.call(this);
}
if (this.checkContextReady('thaiWord')) {
applyThaiFeatures.call(this);
}
if (this.checkContextReady('unicodeVariationSequence')) {
applyUnicodeVariationSequences.call(this);
}
};

/**
* Check whatever feature is successfully enabled for a script
* @param {string} script
* @param {string} tag feature name
* @returns {boolean}
*/
Bidi.prototype.hasFeatureEnabled = function(script, tag) {
return (this.featuresTags[script] || []).indexOf(tag) !== -1;
};

/**
* process text input
* @param {string} text an input text
Expand Down
8 changes: 8 additions & 0 deletions src/char.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ export function isTashkeelArabicChar(char) {
return /[\u0600-\u0605\u060C-\u060E\u0610-\u061B\u061E\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED]/.test(char);
}

/**
* Check if a char is Thai
* @param {string} c a single char
*/
export function isThaiChar(c) {
return /[\u0E00-\u0E7F]/.test(c);
}

/**
* Check if a char is Latin
* @param {string} c a single char
Expand Down
30 changes: 30 additions & 0 deletions src/features/thai/contextCheck/thaiWord.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { isThaiChar } from '../../../char.js';

/**
* Thai word context checkers
*/
function thaiWordStartCheck(contextParams) {
const char = contextParams.current;
const prevChar = contextParams.get(-1);
return (
// ? thai first char
(prevChar === null && isThaiChar(char)) ||
// ? thai char preceded with a non thai char
(!isThaiChar(prevChar) && isThaiChar(char))
);
}

function thaiWordEndCheck(contextParams) {
const nextChar = contextParams.get(1);
return (
// ? last thai char
(nextChar === null) ||
// ? next char is not thai
(!isThaiChar(nextChar))
);
}

export default {
startCheck: thaiWordStartCheck,
endCheck: thaiWordEndCheck
};
40 changes: 40 additions & 0 deletions src/features/thai/thaiGlyphComposition.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Apply Thai Glyph Composition feature to tokens
*/

import { ContextParams } from '../../tokenizer.js';
import applySubstitution from '../applySubstitution.js';

/**
* Update context params
* @param {any} tokens a list of tokens
* @param {number} index current item index
*/
function getContextParams(tokens, index) {
const context = tokens.map(token => token.activeState.value);
return new ContextParams(context, index || 0);
}

/**
* Apply Thai required glyphs composition substitutions
* @param {ContextRange} range a range of tokens
*/
function thaiGlyphComposition(range) {
const script = 'thai';
let tokens = this.tokenizer.getRangeTokens(range);
let contextParams = getContextParams(tokens, 0);
contextParams.context.forEach((glyphIndex, index) => {
contextParams.setCurrentIndex(index);
let substitutions = this.query.lookupFeature({
tag: 'ccmp', script, contextParams
});
if (substitutions.length) {
substitutions.forEach(
action => applySubstitution(action, tokens, index)
);
contextParams = getContextParams(tokens, index);
}
});
}

export default thaiGlyphComposition;
40 changes: 40 additions & 0 deletions src/features/thai/thaiLigatures.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Apply Thai Ligatures feature to tokens
*/

import { ContextParams } from '../../tokenizer.js';
import applySubstitution from '../applySubstitution.js';

/**
* Update context params
* @param {any} tokens a list of tokens
* @param {number} index current item index
*/
function getContextParams(tokens, index) {
const context = tokens.map(token => token.activeState.value);
return new ContextParams(context, index || 0);
}

/**
* Apply Thai required glyphs composition substitutions
* @param {ContextRange} range a range of tokens
*/
function thaiLigatures(range) {
const script = 'thai';
let tokens = this.tokenizer.getRangeTokens(range);
let contextParams = getContextParams(tokens, 0);
contextParams.context.forEach((glyphIndex, index) => {
contextParams.setCurrentIndex(index);
let substitutions = this.query.lookupFeature({
tag: 'liga', script, contextParams
});
if (substitutions.length) {
substitutions.forEach(
action => applySubstitution(action, tokens, index)
);
contextParams = getContextParams(tokens, index);
}
});
}

export default thaiLigatures;
40 changes: 40 additions & 0 deletions src/features/thai/thaiRequiredLigatures.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Apply Thai required ligatures feature to tokens
*/

import { ContextParams } from '../../tokenizer.js';
import applySubstitution from '../applySubstitution.js';

/**
* Update context params
* @param {any} tokens a list of tokens
* @param {number} index current item index
*/
function getContextParams(tokens, index) {
const context = tokens.map(token => token.activeState.value);
return new ContextParams(context, index || 0);
}

/**
* Apply Thai required glyphs composition substitutions
* @param {ContextRange} range a range of tokens
*/
function thaiRequiredLigatures(range) {
const script = 'thai';
let tokens = this.tokenizer.getRangeTokens(range);
let contextParams = getContextParams(tokens, 0);
contextParams.context.forEach((glyphIndex, index) => {
contextParams.setCurrentIndex(index);
let substitutions = this.query.lookupFeature({
tag: 'rlig', script, contextParams
});
if (substitutions.length) {
substitutions.forEach(
action => applySubstitution(action, tokens, index)
);
contextParams = getContextParams(tokens, index);
}
});
}

export default thaiRequiredLigatures;
3 changes: 2 additions & 1 deletion src/font.js
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,8 @@ Font.prototype.defaultRenderOptions = {
* and shouldn't be turned off when rendering arabic text.
*/
{ script: 'arab', tags: ['init', 'medi', 'fina', 'rlig'] },
{ script: 'latn', tags: ['liga', 'rlig'] }
{ script: 'latn', tags: ['liga', 'rlig'] },
{ script: 'thai', tags: ['liga', 'rlig', 'ccmp'] },
]
};

Expand Down
48 changes: 47 additions & 1 deletion test/bidi.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ describe('bidi.js', function() {
let bidiFira;
let bidiScheherazade;
let arabicTokenizer;

before(function () {
/**
* arab
Expand Down Expand Up @@ -96,5 +97,50 @@ describe('bidi.js', function() {
assert.deepEqual(font.stringToGlyphIndexes(string), [1, 1, 2, 1, 4, 3]);
});
});
});

describe('thai scripts', () => {

let thaiFont;
let bidiThai;

before(()=> {
thaiFont = loadSync('./test/fonts/NotoSansThai-Medium-Testing-v1.ttf');
bidiThai = new Bidi();
bidiThai.registerModifier(
'glyphIndex', null, token => thaiFont.charToGlyphIndex(token.char)
);
const requiredThaiFeatures = [{
script: 'thai',
tags: ['liga', 'rlig', 'ccmp']
}];
bidiThai.applyFeatures(thaiFont, requiredThaiFeatures);
});

describe('thai features', () => {
it('should apply glyph composition', () => {
let glyphIndexes = bidiThai.getTextGlyphs('่ํ');
assert.deepEqual(glyphIndexes, [451]);
});

it('should apply glyph ligatures', () => {
let glyphIndexes = bidiThai.getTextGlyphs('ฤๅ');
assert.deepEqual(glyphIndexes, [459]);
});

it('should apply glyph required ligatures', () => {
let glyphIndexes = bidiThai.getTextGlyphs('ลล');
assert.deepEqual(glyphIndexes, [352]);
});
});

describe('thai contexts', () => {
it('should match thai words in a given text', () => {
const tokenizer = bidiThai.tokenizer;
tokenizer.tokenize('The king said: เป็นคนใจดีสำหรับทุกคน because ความรักคือทุกสิ่ง');
const ranges = tokenizer.getContextRanges('thaiWord');
const words = ranges.map(range => tokenizer.rangeToText(range));
assert.deepEqual(words, ['เป็นคนใจดีสำหรับทุกคน', 'ความรักคือทุกสิ่ง']);
});
});
});
});
Binary file added test/fonts/NotoSansThai-Medium-Testing-v1.ttf
Binary file not shown.

0 comments on commit 814e528

Please sign in to comment.