|
| 1 | +'use strict' |
| 2 | + |
| 3 | +const HIGH_SURROGATE_START = 0xD800 |
| 4 | +const HIGH_SURROGATE_END = 0xDBFF |
| 5 | + |
| 6 | +const LOW_SURROGATE_START = 0xDC00 |
| 7 | + |
| 8 | +const REGIONAL_INDICATOR_START = 0x1F1E6 |
| 9 | +const REGIONAL_INDICATOR_END = 0x1F1FF |
| 10 | + |
| 11 | +const FITZPATRICK_MODIFIER_START = 0x1f3fb |
| 12 | +const FITZPATRICK_MODIFIER_END = 0x1f3ff |
| 13 | + |
| 14 | +const VARIATION_MODIFIER_START = 0xFE00 |
| 15 | +const VARIATION_MODIFIER_END = 0xFE0F |
| 16 | + |
| 17 | +function runes (string) { |
| 18 | + if (typeof string !== 'string') { |
| 19 | + throw new Error('string cannot be undefined or null') |
| 20 | + } |
| 21 | + const result = [] |
| 22 | + let i = 0 |
| 23 | + let increment |
| 24 | + while (i < string.length) { |
| 25 | + increment = nextUnits(i, string) |
| 26 | + result.push(string.substring(i, i + increment)) |
| 27 | + i += increment |
| 28 | + } |
| 29 | + return result |
| 30 | +} |
| 31 | + |
| 32 | +// Decide how many code units make up the current character. |
| 33 | +// BMP characters: 1 code unit |
| 34 | +// Non-BMP characters (represented by surrogate pairs): 2 code units |
| 35 | +// Emoji with skin-tone modifiers: 4 code units (2 code points) |
| 36 | +// Country flags: 4 code units (2 code points) |
| 37 | +// Variations: 2 code units |
| 38 | +function nextUnits (i, string) { |
| 39 | + const current = string[i] |
| 40 | + |
| 41 | + // If we have variation selector at next position, we can handle it as pair |
| 42 | + if (isVariationSelector(string[i + 1])) { |
| 43 | + return 2 |
| 44 | + } |
| 45 | + |
| 46 | + // If we don't have a value that is part of a surrogate pair, or we're at |
| 47 | + // the end, only take the value at i |
| 48 | + if (!isFirstOfSurrogatePair(current) || i === string.length - 1) { |
| 49 | + return 1 |
| 50 | + } |
| 51 | + |
| 52 | + const currentPair = current + string[i + 1] |
| 53 | + let nextPair = string.substring(i + 2, i + 5) |
| 54 | + |
| 55 | + // Country flags are comprised of two regional indicator symbols, |
| 56 | + // each represented by a surrogate pair. |
| 57 | + // See http://emojipedia.org/flags/ |
| 58 | + // If both pairs are regional indicator symbols, take 4 |
| 59 | + if (isRegionalIndicator(currentPair) && isRegionalIndicator(nextPair)) { |
| 60 | + return 4 |
| 61 | + } |
| 62 | + |
| 63 | + // If the next pair make a Fitzpatrick skin tone |
| 64 | + // modifier, take 4 |
| 65 | + // See http://emojipedia.org/modifiers/ |
| 66 | + // Technically, only some code points are meant to be |
| 67 | + // combined with the skin tone modifiers. This function |
| 68 | + // does not check the current pair to see if it is |
| 69 | + // one of them. |
| 70 | + if (isFitzpatrickModifier(nextPair)) { |
| 71 | + return 4 |
| 72 | + } |
| 73 | + |
| 74 | + return 2 |
| 75 | +} |
| 76 | + |
| 77 | +function isFirstOfSurrogatePair (string) { |
| 78 | + return string && betweenInclusive(string[0].charCodeAt(0), HIGH_SURROGATE_START, HIGH_SURROGATE_END) |
| 79 | +} |
| 80 | + |
| 81 | +function isRegionalIndicator (string) { |
| 82 | + return betweenInclusive(codePointFromSurrogatePair(string), REGIONAL_INDICATOR_START, REGIONAL_INDICATOR_END) |
| 83 | +} |
| 84 | + |
| 85 | +function isFitzpatrickModifier (string) { |
| 86 | + return betweenInclusive(codePointFromSurrogatePair(string), FITZPATRICK_MODIFIER_START, FITZPATRICK_MODIFIER_END) |
| 87 | +} |
| 88 | + |
| 89 | +function isVariationSelector (string) { |
| 90 | + return typeof string === 'string' && betweenInclusive(string.charCodeAt(0), VARIATION_MODIFIER_START, VARIATION_MODIFIER_END) |
| 91 | +} |
| 92 | + |
| 93 | +function codePointFromSurrogatePair (pair) { |
| 94 | + const highOffset = pair.charCodeAt(0) - HIGH_SURROGATE_START |
| 95 | + const lowOffset = pair.charCodeAt(1) - LOW_SURROGATE_START |
| 96 | + return (highOffset << 10) + lowOffset + 0x10000 |
| 97 | +} |
| 98 | + |
| 99 | +function betweenInclusive (value, lower, upper) { |
| 100 | + return value >= lower && value <= upper |
| 101 | +} |
| 102 | + |
| 103 | +module.exports = runes |
0 commit comments