From a5d515d7196054eeeb61c386e6e89cf670b41b26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kenneth=20Waldenstr=C3=B8m?= Date: Thu, 11 May 2023 12:27:32 +0200 Subject: [PATCH] Fix encoding problem when CodedCharacterSet is set to "ESC - A" According to https://en.wikipedia.org/wiki/ISO/IEC_2022#cite_note-14.3.2-90 ISO-8859-1 should be used both when CodedCharacterSet is ESC % A ESC . A ESC - A Previously, only the first two syntaxes was supported. --- Source/com/drew/metadata/iptc/Iso2022Converter.java | 4 ++++ Tests/com/drew/metadata/iptc/Iso2022ConverterTest.java | 1 + 2 files changed, 5 insertions(+) diff --git a/Source/com/drew/metadata/iptc/Iso2022Converter.java b/Source/com/drew/metadata/iptc/Iso2022Converter.java index 4db29851b..3b6b5f14a 100644 --- a/Source/com/drew/metadata/iptc/Iso2022Converter.java +++ b/Source/com/drew/metadata/iptc/Iso2022Converter.java @@ -37,6 +37,7 @@ public final class Iso2022Converter private static final int DOT = 0xe280a2; private static final byte LATIN_CAPITAL_G = 0x47; private static final byte PERCENT_SIGN = 0x25; + private static final byte MINUS_SIGN = 0x2D; private static final byte DOT_SIGN = 0x2E; private static final byte ESC = 0x1B; @@ -59,6 +60,9 @@ public static String convertISO2022CharsetToJavaCharset(@NotNull final byte[] by if (bytes.length > 3 && bytes[0] == ESC && (bytes[3] & 0xFF | ((bytes[2] & 0xFF) << 8) | ((bytes[1] & 0xFF) << 16)) == DOT && bytes[4] == LATIN_CAPITAL_A) return ISO_8859_1; + if (bytes.length > 2 && bytes[0] == ESC && bytes[1] == MINUS_SIGN && bytes[2] == LATIN_CAPITAL_A) + return ISO_8859_1; + return null; } diff --git a/Tests/com/drew/metadata/iptc/Iso2022ConverterTest.java b/Tests/com/drew/metadata/iptc/Iso2022ConverterTest.java index 04caa0705..90ee2368d 100644 --- a/Tests/com/drew/metadata/iptc/Iso2022ConverterTest.java +++ b/Tests/com/drew/metadata/iptc/Iso2022ConverterTest.java @@ -32,5 +32,6 @@ public void testConvertISO2022CharsetToJavaCharset() throws Exception assertEquals("UTF-8", Iso2022Converter.convertISO2022CharsetToJavaCharset(new byte[]{0x1B, 0x25, 0x47})); assertEquals("ISO-8859-1", Iso2022Converter.convertISO2022CharsetToJavaCharset(new byte[]{0x1B, 0x2E, 0x41})); assertEquals("ISO-8859-1", Iso2022Converter.convertISO2022CharsetToJavaCharset(new byte[]{0x1B, (byte)0xE2, (byte)0x80, (byte)0xA2, 0x41})); + assertEquals("ISO-8859-1", Iso2022Converter.convertISO2022CharsetToJavaCharset(new byte[]{0x1B, (byte)0x2D, (byte)0x41})); } }