diff --git a/src/org/openstreetmap/josm/tools/AlphanumComparator.java b/src/org/openstreetmap/josm/tools/AlphanumComparator.java index 318784358c0..a49c7595e70 100644 --- a/src/org/openstreetmap/josm/tools/AlphanumComparator.java +++ b/src/org/openstreetmap/josm/tools/AlphanumComparator.java @@ -35,9 +35,6 @@ import java.util.Arrays; import java.util.Comparator; -import org.openstreetmap.josm.gui.MainApplication; -import org.openstreetmap.josm.spi.lifecycle.Lifecycle; - /** * The Alphanum Algorithm is an improved sorting algorithm for strings * containing numbers: Instead of sorting numbers in ASCII order like a standard @@ -51,13 +48,20 @@ * */ public final class AlphanumComparator implements Comparator, Serializable { + /** {@code true} to use the faster ASCII sorting algorithm. Set to {@code false} when testing compatibility. */ + static boolean useFastASCIISort = true; + /** + * The sort order for the fast ASCII sort method. + */ + static final String ASCII_SORT_ORDER = + " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; private static final long serialVersionUID = 1L; private static final AlphanumComparator INSTANCE = new AlphanumComparator(); /** * A mapping from ASCII characters to the default {@link Collator} order. - * At writing, the default rules can be found in {@link sun.util.locale.provider.CollationRules#DEFAULTRULES}. + * At writing, the default rules can be found in CollationRules#DEFAULTRULES. */ private static final byte[] ASCII_MAPPING = new byte[128]; static { @@ -70,9 +74,8 @@ public final class AlphanumComparator implements Comparator, Serializabl // We have 37 order overrides for symbols; ASCII tables has control characters through 31. 32-47 are symbols. // After the symbols, we have 0-9, and then aA-zZ. // The character order - final String order = " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ"; - for (int i = 0; i < order.length(); i++) { - char c = order.charAt(i); + for (int i = 0; i < ASCII_SORT_ORDER.length(); i++) { + char c = ASCII_SORT_ORDER.charAt(i); ASCII_MAPPING[c] = (byte) (i + 1); } } @@ -100,15 +103,28 @@ private AlphanumComparator() { * @return See {@link String#compareToIgnoreCase(String)} (e.g. {@code string1.compareToIgnoreCase(string2)}). */ private static int compareString(String string1, int len1, String string2, int len2) { - int lim = Math.min(len1, len2); - int k = 0; - while (k < lim) { - final int c1 = ASCII_MAPPING[string1.charAt(k)]; - final int c2 = ASCII_MAPPING[string2.charAt(k)]; + int loc1 = 0; + int loc2 = 0; + while (loc1 < len1 && loc2 < len2) { + // Ignore control symbols + while (loc1 < len1 - 1 && string1.charAt(loc1) <= 32) { + loc1++; + } + while (loc2 < len2 - 1 && string2.charAt(loc2) <= 32) { + loc2++; + } + if (loc1 >= len1 || loc2 >= len2) break; + + char lower1 = Character.toLowerCase(string1.charAt(loc1)); + char lower2 = Character.toLowerCase(string2.charAt(loc2)); + + final int c1 = ASCII_MAPPING[lower1]; + final int c2 = ASCII_MAPPING[lower2]; if (c1 != c2) { return c1 - c2; } - k++; + loc1++; + loc2++; } return len1 - len2; } @@ -184,9 +200,8 @@ private static int compareChunk(String thisChunk, int thisChunkLength, String th } } } else { - // Check if both chunks are ascii only - // FIXME: re-enable once #23471 is fixed (the exception at startup keeps JOSM from finishing startup) - if (false && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) { + // Check if both chunks are ascii only; if so, use a much faster sorting algorithm. + if (useFastASCIISort && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) { return Utils.clamp(compareString(thisChunk, thisChunkLength, thatChunk, thatChunkLength), -1, 1); } // Instantiate the collator diff --git a/test/unit/org/openstreetmap/josm/tools/AlphanumComparatorTest.java b/test/unit/org/openstreetmap/josm/tools/AlphanumComparatorTest.java index 4f42cdd9673..9e0a7fe7cbc 100644 --- a/test/unit/org/openstreetmap/josm/tools/AlphanumComparatorTest.java +++ b/test/unit/org/openstreetmap/josm/tools/AlphanumComparatorTest.java @@ -1,17 +1,27 @@ // License: GPL. For details, see LICENSE file. package org.openstreetmap.josm.tools; +import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.text.Collator; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.function.BiFunction; +import java.util.stream.Stream; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; /** * Unit tests of {@link AlphanumComparator}. */ class AlphanumComparatorTest { + @AfterEach + void teardown() { + AlphanumComparator.useFastASCIISort = true; + } /** * Test numeric strings. @@ -32,4 +42,76 @@ void testMixed() { lst.sort(AlphanumComparator.getInstance()); assertEquals(Arrays.asList("a5", "a100", "a00999", "b1", "b20"), lst); } + + private static Stream testNonRegression23471Arguments() { + List testStrings = Arrays.asList( + "AMEN", + "Ameriabank", + "America First Credit Union", + "BAC Credomatic", + "BADR Banque", + "BAI", + "Banca Popolare di Cividale", + "Banca Popolare di Sondrio", + "Banca Sella", + "Banca Transilvania", + "Bancaribe", + "BancaStato", + "Banco Agrario", + "Banco AV Villas", + "Banco Azteca", + "Banco Bicentenario", + "Banco BISA", + "Banco BMG", + "Banco BPI (Portugal)", + "Banco BPM", + "Banco Caja Social", + "Banco Ciudad", + "Banco Continental (Paraguay)", + "Banco di Sardegna" + ); + List testChars = new ArrayList<>(AlphanumComparator.ASCII_SORT_ORDER.length()); + for (char c : AlphanumComparator.ASCII_SORT_ORDER.toCharArray()) { + testChars.add(Character.toString(c)); + } + BiFunction, String, List> subList = (list, string) -> list.subList(list.indexOf(string), list.size()); + return Stream.concat( + testStrings.stream().flatMap(first -> subList.apply(testStrings, first).stream().map(second -> new String[]{first, second})), + testChars.stream().flatMap(first -> subList.apply(testChars, first).stream().map(second -> new String[]{first, second})) + ); + } + + /** + * Non-regression test for #23471 + * This ensures that the comparison contract holds. + * There are ~5300 combinations run in <1s (as of 2024-02-14). + */ + @Test + void testNonRegression23471() { + assertAll(testNonRegression23471Arguments().map(strings -> () -> testNonRegression23471(strings[0], strings[1]))); + } + + private static void testNonRegression23471(String first, String second) { + AlphanumComparator.useFastASCIISort = true; + final AlphanumComparator instance = AlphanumComparator.getInstance(); + assertEquals(-instance.compare(first, second), instance.compare(second, first)); + // Ensure that the fast sort is equivalent to the slow sort + AlphanumComparator.useFastASCIISort = false; + final int slowFirstSecond = instance.compare(first, second); + final int slowSecondFirst = instance.compare(second, first); + AlphanumComparator.useFastASCIISort = true; + final int fastFirstSecond = instance.compare(first, second); + final int fastSecondFirst = instance.compare(second, first); + assertEquals(slowFirstSecond, fastFirstSecond); + assertEquals(slowSecondFirst, fastSecondFirst); + + final Collator collator = Collator.getInstance(); + collator.setStrength(Collator.SECONDARY); + // Check against the collator instance + assertEquals(Utils.clamp(collator.compare(first, second), -1, 1), + Utils.clamp(instance.compare(first, second), -1, 1)); + assertEquals(Utils.clamp(collator.compare(second, first), -1, 1), + Utils.clamp(instance.compare(second, first), -1, 1)); + } + }