Skip to content

js: write PerCharacterEscaper.js #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (C) 2016-2023 DiffPlug
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.diffplug.selfie

import com.diffplug.selfie.PerCharacterEscaper.Companion.selfEscape
import com.diffplug.selfie.PerCharacterEscaper.Companion.specifiedEscape
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFails
import kotlin.test.assertSame

class PerCharacterEscaperTest {
@Test
fun performanceOptimizationSelf() {
val escaper = selfEscape("`123")
// if nothing gets changed, it should return the exact same value
val abc = "abc"
assertSame(abc, escaper.escape(abc))
assertSame(abc, escaper.unescape(abc))

// otherwise it should have the normal behavior
assertEquals("`1", escaper.escape("1"))
assertEquals("``", escaper.escape("`"))
assertEquals("abc`1`2`3``def", escaper.escape("abc123`def"))

// in both directions
assertEquals("1", escaper.unescape("`1"))
assertEquals("`", escaper.unescape("``"))
assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def"))
}

@Test
fun performanceOptimizationSpecific() {
val escaper = specifiedEscape("`a1b2c3d")
// if nothing gets changed, it should return the exact same value
val abc = "abc"
assertSame(abc, escaper.escape(abc))
assertSame(abc, escaper.unescape(abc))

// otherwise it should have the normal behavior
assertEquals("`b", escaper.escape("1"))
assertEquals("`a", escaper.escape("`"))
assertEquals("abc`b`c`d`adef", escaper.escape("abc123`def"))

// in both directions
assertEquals("1", escaper.unescape("`b"))
assertEquals("`", escaper.unescape("`a"))
assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def"))
}

@Test
fun cornerCasesSelf() {
val escaper = selfEscape("`123")
// cornercase - escape character without follow-on will throw an error
val exception = assertFails { escaper.unescape("`") }
assertEquals("Escape character '`' can't be the last character in a string.", exception.message)
// escape character followed by non-escape character is fine
assertEquals("a", escaper.unescape("`a"))
}

@Test
fun cornerCasesSpecific() {
val escaper = specifiedEscape("`a1b2c3d")
// cornercase - escape character without follow-on will throw an error
val exception = assertFails { escaper.unescape("`") }
assertEquals("Escape character '`' can't be the last character in a string.", exception.message)
// escape character followed by non-escape character is fine
assertEquals("e", escaper.unescape("`e"))
}

@Test
fun roundtrip() {
val escaper = selfEscape("`<>")
val roundtrip = { str: String? -> assertEquals(str, escaper.unescape(escaper.escape(str!!))) }
roundtrip("")
roundtrip("<local>~`/")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,212 @@
*/
package com.diffplug.selfie

actual class PerCharacterEscaper {
actual fun escape(input: String): String = TODO()
actual fun unescape(input: String): String = TODO()
/**
* If your escape policy is "'123", it means this:
* ```
* abc->abc
* 123->'1'2'3
* I won't->I won''t
* ```
*/
actual class PerCharacterEscaper
/**
* The first character in the string will be uses as the escape character, and all characters will
* be escaped.
*/
private constructor(
private val escapeCodePoint: Int,
private val escapedCodePoints: IntArray,
private val escapedByCodePoints: IntArray
) {
val MIN_SUPPLEMENTARY_CODE_POINT = 0x010000
val MAX_CODE_POINT = 0X10FFFF
val MIN_LOW_SURROGATE = '\uDC00'
val MIN_HIGH_SURROGATE = '\uD800'
private fun highSurrogate(codePoint: Int): Char {
return ((codePoint ushr 10) +
(MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)).code)
.toChar()
}
private fun lowSurrogate(codePoint: Int): Char {
return ((codePoint and 0x3ff) + MIN_LOW_SURROGATE.code).toChar()
}
private fun toSurrogates(codePoint: Int, dst: CharArray, index: Int) {
// We write elements "backwards" to guarantee all-or-nothing
dst[index + 1] = lowSurrogate(codePoint)
dst[index] = highSurrogate(codePoint)
}
private fun toChars(codePoint: Int): CharArray {
return if (isBmpCodePoint(codePoint)) {
charArrayOf(codePoint.toChar())
} else if (isValidCodePoint(codePoint)) {
val result = CharArray(2)
toSurrogates(codePoint, result, 0)
result
} else {
throw IllegalArgumentException("Not a valid Unicode code point: $codePoint")
}
}
private fun isBmpCodePoint(codePoint: Int): Boolean {
return codePoint ushr 16 == 0
}
private fun isValidCodePoint(codePoint: Int): Boolean {
// Optimized form of:
// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
val plane = codePoint ushr 16
return plane < MAX_CODE_POINT + 1 ushr 16
}
private fun charCount(codePoint: Int): Int {
return if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1
}
private fun firstOffsetNeedingEscape(input: String): Int {
val length = input.length
var firstOffsetNeedingEscape = -1
var offset = 0
outer@ while (offset < length) {
val codepoint = codePointAt(input, offset)
for (escaped in escapedCodePoints) {
if (codepoint == escaped) {
firstOffsetNeedingEscape = offset
break@outer
}
}
offset += charCount(codepoint)
}
return firstOffsetNeedingEscape
}
actual fun escape(input: String): String {
val noEscapes = firstOffsetNeedingEscape(input)
return if (noEscapes == -1) {
input
} else {
val length = input.length
val needsEscapes = length - noEscapes
val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4)
builder.append(input, 0, noEscapes)
var offset = noEscapes
while (offset < length) {
val codepoint = codePointAt(input, offset)
offset += charCount(codepoint)
val idx = indexOf(escapedCodePoints, codepoint)
if (idx == -1) {
builder.append(toChars(codepoint))
} else {
builder.append(toChars(escapeCodePoint))
builder.append(toChars(escapedByCodePoints[idx]))
}
}
builder.toString()
}
}
private fun firstOffsetNeedingUnescape(input: String): Int {
val length = input.length
var firstOffsetNeedingEscape = -1
var offset = 0
while (offset < length) {
val codepoint = codePointAt(input, offset)
if (codepoint == escapeCodePoint) {
firstOffsetNeedingEscape = offset
break
}
offset += charCount(codepoint)
}
return firstOffsetNeedingEscape
}
actual fun unescape(input: String): String {
val noEscapes = firstOffsetNeedingUnescape(input)
return if (noEscapes == -1) {
input
} else {
val length = input.length
val needsEscapes = length - noEscapes
val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4)
builder.append(input, 0, noEscapes)
var offset = noEscapes
while (offset < length) {
var codepoint = codePointAt(input, offset)
offset += charCount(codepoint)
// if we need to escape something, escape it
if (codepoint == escapeCodePoint) {
if (offset < length) {
codepoint = codePointAt(input, offset)
val idx = indexOf(escapedByCodePoints, codepoint)
if (idx != -1) {
codepoint = escapedCodePoints[idx]
}
offset += charCount(codepoint)
} else {
throw IllegalArgumentException(
"Escape character '" +
toChars(escapeCodePoint).concatToString(0, 0 + 1) +
"' can't be the last character in a string.")
}
}
// we didn't escape it, append it raw
builder.append(toChars(codepoint))
}
builder.toString()
}
}

actual companion object {
actual fun selfEscape(escapePolicy: String): PerCharacterEscaper = TODO()
actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper = TODO()
private fun indexOf(arr: IntArray, target: Int): Int {
for ((index, value) in arr.withIndex()) {
if (value == target) {
return index
}
}
return -1
}
private fun codePointAt(value: String, offset: Int): Int {
val codePoint = js("value.codePointAt(offset)")
return codePoint
}
private fun codePoints(value: String): IntArray {
val result = mutableListOf<Int>()
var offset = 0
while (offset < value.length) {
val codepoint = codePointAt(value, offset)
result.add(codepoint)
offset += 1
}

return result.toIntArray()
}

/**
* If your escape policy is "'123", it means this:
* ```
* abc->abc
* 123->'1'2'3
* I won't->I won''t
* ```
*/
actual fun selfEscape(escapePolicy: String): PerCharacterEscaper {
val escapedCodePoints = codePoints(escapePolicy)
val escapeCodePoint = escapedCodePoints[0]
return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedCodePoints)
}

/**
* If your escape policy is "'a1b2c3d", it means this:
* ```
* abc->abc
* 123->'b'c'd
* I won't->I won'at
* ```
*/
actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper {
val codePoints = codePoints(escapePolicy)
require(codePoints.size % 2 == 0)
val escapeCodePoint = codePoints[0]
val escapedCodePoints = IntArray(codePoints.size / 2)
val escapedByCodePoints = IntArray(codePoints.size / 2)
for (i in escapedCodePoints.indices) {
escapedCodePoints[i] = codePoints[2 * i]
escapedByCodePoints[i] = codePoints[2 * i + 1]
}
return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedByCodePoints)
}
}
}
Loading