Skip to content

Commit 8013b6e

Browse files
committed
✂️ initial commit
1 parent 175b69a commit 8013b6e

10 files changed

+395
-0
lines changed

.editorconfig

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# editorconfig.org
2+
root = true
3+
4+
[*]
5+
indent_style = space
6+
indent_size = 2
7+
end_of_line = lf
8+
charset = utf-8
9+
trim_trailing_whitespace = true
10+
insert_final_newline = true
11+
12+
[*.md]
13+
trim_trailing_whitespace = false
14+
15+
[*.txt]
16+
trim_trailing_whitespace = false

.eslintignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node_modules/

.eslintrc

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"extends": ["standard"],
3+
"plugins": ["mocha"],
4+
"env": {
5+
"mocha": true
6+
}
7+
}

.gitignore

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
6+
# Runtime data
7+
pids
8+
*.pid
9+
*.seed
10+
11+
# Directory for instrumented libs generated by jscoverage/JSCover
12+
lib-cov
13+
14+
# Coverage directory used by tools like istanbul
15+
coverage
16+
17+
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
18+
.grunt
19+
20+
# node-waf configuration
21+
.lock-wscript
22+
23+
# Compiled binary addons (http://nodejs.org/api/addons.html)
24+
build/Release
25+
26+
# Dependency directory
27+
node_modules
28+
29+
# Optional npm cache directory
30+
.npm
31+
32+
# Optional REPL history
33+
.node_repl_history

.travis.yml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
language: node_js
2+
node_js:
3+
- "4.0.0"

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2016 Justin Sippel, Vitaly Domnikov
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

index.js

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
'use strict'
2+
3+
const HIGH_SURROGATE_START = 0xD800
4+
const HIGH_SURROGATE_END = 0xDBFF
5+
6+
const LOW_SURROGATE_START = 0xDC00
7+
8+
const REGIONAL_INDICATOR_START = 0x1F1E6
9+
const REGIONAL_INDICATOR_END = 0x1F1FF
10+
11+
const FITZPATRICK_MODIFIER_START = 0x1f3fb
12+
const FITZPATRICK_MODIFIER_END = 0x1f3ff
13+
14+
const VARIATION_MODIFIER_START = 0xFE00
15+
const VARIATION_MODIFIER_END = 0xFE0F
16+
17+
function runes (string) {
18+
if (typeof string !== 'string') {
19+
throw new Error('string cannot be undefined or null')
20+
}
21+
const result = []
22+
let i = 0
23+
let increment
24+
while (i < string.length) {
25+
increment = nextUnits(i, string)
26+
result.push(string.substring(i, i + increment))
27+
i += increment
28+
}
29+
return result
30+
}
31+
32+
// Decide how many code units make up the current character.
33+
// BMP characters: 1 code unit
34+
// Non-BMP characters (represented by surrogate pairs): 2 code units
35+
// Emoji with skin-tone modifiers: 4 code units (2 code points)
36+
// Country flags: 4 code units (2 code points)
37+
// Variations: 2 code units
38+
function nextUnits (i, string) {
39+
const current = string[i]
40+
41+
// If we have variation selector at next position, we can handle it as pair
42+
if (isVariationSelector(string[i + 1])) {
43+
return 2
44+
}
45+
46+
// If we don't have a value that is part of a surrogate pair, or we're at
47+
// the end, only take the value at i
48+
if (!isFirstOfSurrogatePair(current) || i === string.length - 1) {
49+
return 1
50+
}
51+
52+
const currentPair = current + string[i + 1]
53+
let nextPair = string.substring(i + 2, i + 5)
54+
55+
// Country flags are comprised of two regional indicator symbols,
56+
// each represented by a surrogate pair.
57+
// See http://emojipedia.org/flags/
58+
// If both pairs are regional indicator symbols, take 4
59+
if (isRegionalIndicator(currentPair) && isRegionalIndicator(nextPair)) {
60+
return 4
61+
}
62+
63+
// If the next pair make a Fitzpatrick skin tone
64+
// modifier, take 4
65+
// See http://emojipedia.org/modifiers/
66+
// Technically, only some code points are meant to be
67+
// combined with the skin tone modifiers. This function
68+
// does not check the current pair to see if it is
69+
// one of them.
70+
if (isFitzpatrickModifier(nextPair)) {
71+
return 4
72+
}
73+
74+
return 2
75+
}
76+
77+
function isFirstOfSurrogatePair (string) {
78+
return string && betweenInclusive(string[0].charCodeAt(0), HIGH_SURROGATE_START, HIGH_SURROGATE_END)
79+
}
80+
81+
function isRegionalIndicator (string) {
82+
return betweenInclusive(codePointFromSurrogatePair(string), REGIONAL_INDICATOR_START, REGIONAL_INDICATOR_END)
83+
}
84+
85+
function isFitzpatrickModifier (string) {
86+
return betweenInclusive(codePointFromSurrogatePair(string), FITZPATRICK_MODIFIER_START, FITZPATRICK_MODIFIER_END)
87+
}
88+
89+
function isVariationSelector (string) {
90+
return typeof string === 'string' && betweenInclusive(string.charCodeAt(0), VARIATION_MODIFIER_START, VARIATION_MODIFIER_END)
91+
}
92+
93+
function codePointFromSurrogatePair (pair) {
94+
const highOffset = pair.charCodeAt(0) - HIGH_SURROGATE_START
95+
const lowOffset = pair.charCodeAt(1) - LOW_SURROGATE_START
96+
return (highOffset << 10) + lowOffset + 0x10000
97+
}
98+
99+
function betweenInclusive (value, lower, upper) {
100+
return value >= lower && value <= upper
101+
}
102+
103+
module.exports = runes

package.json

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"name": "runes",
3+
"version": "0.0.0",
4+
"description": "Unicode-aware JS string splitting",
5+
"main": "index.js",
6+
"scripts": {
7+
"test": "eslint . && mocha test"
8+
},
9+
"repository": {
10+
"type": "git",
11+
"url": "git+ssh://[email protected]/dotcypress/runes.git"
12+
},
13+
"keywords": [
14+
"unicode",
15+
"runes",
16+
"split",
17+
"split string"
18+
],
19+
"author": "Vitaly Domnikov <[email protected]>",
20+
"license": "MIT",
21+
"bugs": {
22+
"url": "https://github.com/dotcypress/runes/issues"
23+
},
24+
"homepage": "https://github.com/dotcypress/runes#readme",
25+
"engines": {
26+
"node": ">=4.0.0"
27+
},
28+
"files": [
29+
"index.js"
30+
],
31+
"devDependencies": {
32+
"eslint": "^3.3.1",
33+
"eslint-config-standard": "^5.3.1",
34+
"eslint-plugin-mocha": "^4.3.0",
35+
"eslint-plugin-promise": "^2.0.1",
36+
"eslint-plugin-standard": "^2.0.0",
37+
"mocha": "^3.0.2",
38+
"should": "^11.1.0"
39+
}
40+
}

readme.md

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# ✂️ Runes
2+
[![Build Status](https://img.shields.io/travis/dotcypress/runes.svg?branch=master&style=flat-square)](https://travis-ci.org/dotcypress/runes)
3+
[![NPM Version](https://img.shields.io/npm/v/runes.svg?style=flat-square)](https://www.npmjs.com/package/runes)
4+
5+
Unicode-aware JS string splitting
6+
7+
Split a string into its constituent characters, without munging emoji and other non-BMP code points.
8+
9+
## Why?
10+
11+
The native `String#split` implementation does not pay attention to [surrogate pairs](http://en.wikipedia.org/wiki/UTF-16). When the code units of a surrogate pair are split apart, they are not intelligible on their own. Unless they are put back together in the correct order, individual code units will cause problems in code that handles strings.
12+
13+
## Installation
14+
15+
```js
16+
$ npm install runes
17+
```
18+
19+
## Example
20+
21+
```js
22+
23+
const runes = require('runes')
24+
25+
const example = 'Emoji 🤖'
26+
27+
example.split('') // ["E", "m", "o", "j", "i", " ", "�", "�"]
28+
runes(example) // ["E", "m", "o", "j", "i", " ", "🤖"]
29+
30+
```
31+
32+
## License
33+
34+
The MIT License (MIT)
35+
36+
Copyright (c) 2016 Justin Sippel, Vitaly Domnikov
37+
38+
Permission is hereby granted, free of charge, to any person obtaining a copy
39+
of this software and associated documentation files (the "Software"), to deal
40+
in the Software without restriction, including without limitation the rights
41+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
42+
copies of the Software, and to permit persons to whom the Software is
43+
furnished to do so, subject to the following conditions:
44+
45+
The above copyright notice and this permission notice shall be included in all
46+
copies or substantial portions of the Software.
47+
48+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
49+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
50+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
51+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
52+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
53+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54+
SOFTWARE.
55+

0 commit comments

Comments
 (0)