diff --git a/scripts/unicode.py b/scripts/unicode.py index d5397ef..1be12d8 100755 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -43,7 +43,7 @@ from itertools import batched from typing import Callable, Iterable -UNICODE_VERSION = "16.0.0" +UNICODE_VERSION = "17.0.0" """The version of the Unicode data files to download.""" NUM_CODEPOINTS = 0x110000 @@ -178,7 +178,9 @@ class WidthState(enum.IntEnum): (if set, should also set 3rd and 4th) - 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state where no ZWJ has been encountered yet; encountering one flips this on - - Seventh bit: is VS1 (if CJK) or is VS2 (not CJK) + - Seventh bit: + - CJK mode: is VS1 or VS3 + - Not CJK: is VS2 """ # BASIC WIDTHS @@ -275,8 +277,8 @@ class WidthState(enum.IntEnum): # VARIATION SELECTORS - VARIATION_SELECTOR_1_OR_2 = 0b0000_0010_0000_0000 - "\\uFE00 if CJK, or \\uFE01 otherwise" + VARIATION_SELECTOR_1_2_OR_3 = 0b0000_0010_0000_0000 + "\\uFE00 or \\uFE02 if CJK, or \\uFE01 otherwise" # Text presentation sequences (not CJK) VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000 @@ -373,7 +375,7 @@ def width_alone(self) -> int: | WidthState.COMBINING_LONG_SOLIDUS_OVERLAY | WidthState.VARIATION_SELECTOR_15 | WidthState.VARIATION_SELECTOR_16 - | WidthState.VARIATION_SELECTOR_1_OR_2 + | WidthState.VARIATION_SELECTOR_1_2_OR_3 ): return 0 case ( @@ -657,11 +659,12 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]: ea[cp] = width # East-Asian only - ea[0xFE00] = WidthState.VARIATION_SELECTOR_1_OR_2 ea[0x0338] = WidthState.COMBINING_LONG_SOLIDUS_OVERLAY + ea[0xFE00] = WidthState.VARIATION_SELECTOR_1_2_OR_3 + ea[0xFE02] = WidthState.VARIATION_SELECTOR_1_2_OR_3 # Not East Asian only - not_ea[0xFE01] = WidthState.VARIATION_SELECTOR_1_OR_2 + not_ea[0xFE01] = WidthState.VARIATION_SELECTOR_1_2_OR_3 not_ea[0xFE0E] = WidthState.VARIATION_SELECTOR_15 return (not_ea, ea) @@ -759,7 +762,7 @@ def load_solidus_transparent( num_chars = len(ccc_above_1) for cp in ccc_above_1: - if cp not in [0xFE00, 0xFE0F]: + if cp not in [0xFE00, 0xFE02, 0xFE0F]: assert ( cjk_width_map[cp].table_width() != CharWidthInTable.SPECIAL ), f"U+{cp:X}" @@ -1317,14 +1320,14 @@ def lookup_fns( if is_cjk: s += """ - if c == '\\u{FE00}' { - return (0, next_info.set_vs1_2()); + if matches!(c, '\\u{FE00}' | '\\u{FE02}') { + return (0, next_info.set_vs1_2_3()); } """ else: s += """ if c == '\\u{FE01}' { - return (0, next_info.set_vs1_2()); + return (0, next_info.set_vs1_2_3()); } if c == '\\u{FE0E}' { return (0, next_info.set_text_presentation()); @@ -1337,7 +1340,7 @@ def lookup_fns( } } else """ - s += """if next_info.is_vs1_2() { + s += """if next_info.is_vs1_2_3() { if matches!(c, '\\u{2018}' | '\\u{2019}' | '\\u{201C}' | '\\u{201D}') { return (""" @@ -1345,7 +1348,7 @@ def lookup_fns( s += """, WidthInfo::DEFAULT); } else { - next_info = next_info.unset_vs1_2(); + next_info = next_info.unset_vs1_2_3(); } } if next_info.is_ligature_transparent() { @@ -1655,7 +1658,7 @@ def emit_module( self.0 | WidthInfo::VARIATION_SELECTOR_16.0 & !WidthInfo::VARIATION_SELECTOR_15.0 - & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0, + & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0, ) }} else {{ Self::VARIATION_SELECTOR_16 @@ -1683,7 +1686,7 @@ def emit_module( self.0 | WidthInfo::VARIATION_SELECTOR_15.0 & !WidthInfo::VARIATION_SELECTOR_16.0 - & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0, + & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0, ) }} else {{ Self(WidthInfo::VARIATION_SELECTOR_15.0) @@ -1696,27 +1699,28 @@ def emit_module( }} /// Has 7th bit set - fn is_vs1_2(self) -> bool {{ - (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0 + fn is_vs1_2_3(self) -> bool {{ + (self.0 & WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0) + == WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0 }} /// Set 7th bit - fn set_vs1_2(self) -> Self {{ + fn set_vs1_2_3(self) -> Self {{ if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{ Self( self.0 - | WidthInfo::VARIATION_SELECTOR_1_OR_2.0 + | WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0 & !WidthInfo::VARIATION_SELECTOR_15.0 & !WidthInfo::VARIATION_SELECTOR_16.0, ) }} else {{ - Self(WidthInfo::VARIATION_SELECTOR_1_OR_2.0) + Self(WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0) }} }} /// Clear 7th bit - fn unset_vs1_2(self) -> Self {{ - Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0) + fn unset_vs1_2_3(self) -> Self {{ + Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0) }} }} diff --git a/src/lib.rs b/src/lib.rs index bc2292c..4845e7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,8 +62,8 @@ //! - Outside of an East Asian context, [text presentation sequences] have width 1 if their base character: //! - Has the [`Emoji_Presentation`] property, and //! - Is not in the [Enclosed Ideographic Supplement] block. -//! - [`'\u{2018}'`, `'\u{2019}'`, `'\u{201C}'`, and `'\u{201D}'`][General Punctuation] always have width 1 when followed by '\u{FE00}', -//! and width 2 when followed by '\u{FE01}'. +//! - [`'\u{2018}'`, `'\u{2019}'`, `'\u{201C}'`, and `'\u{201D}'`][General Punctuation] always have width 1 +//! when followed by '\u{FE00}' or '\u{FE02}', and width 2 when followed by '\u{FE01}'. //! - Script-specific ligatures: //! - For all the following ligatures, the insertion of any number of [default-ignorable][`Default_Ignorable_Code_Point`] //! [combining marks] anywhere in the sequence will not change the total width. In addition, for all non-Arabic diff --git a/src/tables.rs b/src/tables.rs index 390087a..b3f921e 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -44,7 +44,7 @@ impl WidthInfo { const TAG_A6_END_ZWJ_EMOJI_PRESENTATION: Self = Self(0b0000000000011110); const KIRAT_RAI_VOWEL_SIGN_E: Self = Self(0b0000000000100000); const KIRAT_RAI_VOWEL_SIGN_AI: Self = Self(0b0000000000100001); - const VARIATION_SELECTOR_1_OR_2: Self = Self(0b0000001000000000); + const VARIATION_SELECTOR_1_2_OR_3: Self = Self(0b0000001000000000); const VARIATION_SELECTOR_15: Self = Self(0b0100000000000000); const VARIATION_SELECTOR_16: Self = Self(0b1000000000000000); const JOINING_GROUP_ALEF: Self = Self(0b0011000011111111); @@ -94,7 +94,7 @@ impl WidthInfo { self.0 | WidthInfo::VARIATION_SELECTOR_16.0 & !WidthInfo::VARIATION_SELECTOR_15.0 - & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0, + & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0, ) } else { Self::VARIATION_SELECTOR_16 @@ -122,7 +122,7 @@ impl WidthInfo { self.0 | WidthInfo::VARIATION_SELECTOR_15.0 & !WidthInfo::VARIATION_SELECTOR_16.0 - & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0, + & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0, ) } else { Self(WidthInfo::VARIATION_SELECTOR_15.0) @@ -135,33 +135,34 @@ impl WidthInfo { } /// Has 7th bit set - fn is_vs1_2(self) -> bool { - (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0 + fn is_vs1_2_3(self) -> bool { + (self.0 & WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0) + == WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0 } /// Set 7th bit - fn set_vs1_2(self) -> Self { + fn set_vs1_2_3(self) -> Self { if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK { Self( self.0 - | WidthInfo::VARIATION_SELECTOR_1_OR_2.0 + | WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0 & !WidthInfo::VARIATION_SELECTOR_15.0 & !WidthInfo::VARIATION_SELECTOR_16.0, ) } else { - Self(WidthInfo::VARIATION_SELECTOR_1_OR_2.0) + Self(WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0) } } /// Clear 7th bit - fn unset_vs1_2(self) -> Self { - Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0) + fn unset_vs1_2_3(self) -> Self { + Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0) } } /// The version of [Unicode](http://www.unicode.org/) /// that this version of unicode-width is based on. -pub const UNICODE_VERSION: (u8, u8, u8) = (16, 0, 0); +pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); /// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c` by /// consulting a multi-level lookup table. @@ -203,7 +204,7 @@ fn lookup_width(c: char) -> (u8, WidthInfo) { '\u{1A10}' => (1, WidthInfo::BUGINESE_LETTER_YA), '\u{2D31}'..='\u{2D6F}' => (1, WidthInfo::TIFINAGH_CONSONANT), '\u{A4FC}'..='\u{A4FD}' => (1, WidthInfo::LISU_TONE_LETTER_MYA_NA_JEU), - '\u{FE01}' => (0, WidthInfo::VARIATION_SELECTOR_1_OR_2), + '\u{FE01}' => (0, WidthInfo::VARIATION_SELECTOR_1_2_OR_3), '\u{FE0E}' => (0, WidthInfo::VARIATION_SELECTOR_15), '\u{FE0F}' => (0, WidthInfo::VARIATION_SELECTOR_16), '\u{10C03}' => (1, WidthInfo::OLD_TURKIC_LETTER_ORKHON_I), @@ -267,7 +268,7 @@ fn width_in_str(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) { return (0, next_info.set_emoji_presentation()); } if c == '\u{FE01}' { - return (0, next_info.set_vs1_2()); + return (0, next_info.set_vs1_2_3()); } if c == '\u{FE0E}' { return (0, next_info.set_text_presentation()); @@ -278,11 +279,11 @@ fn width_in_str(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) { } else { next_info = next_info.unset_text_presentation(); } - } else if next_info.is_vs1_2() { + } else if next_info.is_vs1_2_3() { if matches!(c, '\u{2018}' | '\u{2019}' | '\u{201C}' | '\u{201D}') { return (2, WidthInfo::DEFAULT); } else { - next_info = next_info.unset_vs1_2(); + next_info = next_info.unset_vs1_2_3(); } } if next_info.is_ligature_transparent() { @@ -515,7 +516,7 @@ fn lookup_width_cjk(c: char) -> (u8, WidthInfo) { '\u{1A10}' => (1, WidthInfo::BUGINESE_LETTER_YA), '\u{2D31}'..='\u{2D6F}' => (1, WidthInfo::TIFINAGH_CONSONANT), '\u{A4FC}'..='\u{A4FD}' => (1, WidthInfo::LISU_TONE_LETTER_MYA_NA_JEU), - '\u{FE00}' => (0, WidthInfo::VARIATION_SELECTOR_1_OR_2), + '\u{FE00}'..='\u{FE02}' => (0, WidthInfo::VARIATION_SELECTOR_1_2_OR_3), '\u{FE0F}' => (0, WidthInfo::VARIATION_SELECTOR_16), '\u{10C03}' => (1, WidthInfo::OLD_TURKIC_LETTER_ORKHON_I), '\u{16D67}' => (1, WidthInfo::KIRAT_RAI_VOWEL_SIGN_E), @@ -586,14 +587,14 @@ fn width_in_str_cjk(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) { if c == '\u{FE0F}' { return (0, next_info.set_emoji_presentation()); } - if c == '\u{FE00}' { - return (0, next_info.set_vs1_2()); + if matches!(c, '\u{FE00}' | '\u{FE02}') { + return (0, next_info.set_vs1_2_3()); } - if next_info.is_vs1_2() { + if next_info.is_vs1_2_3() { if matches!(c, '\u{2018}' | '\u{2019}' | '\u{201C}' | '\u{201D}') { return (1, WidthInfo::DEFAULT); } else { - next_info = next_info.unset_vs1_2(); + next_info = next_info.unset_vs1_2_3(); } } if next_info.is_ligature_transparent() { @@ -1042,13 +1043,13 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ 0x02, 0x02, 0x02, 0x50, 0x02, 0x51, 0x52, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x53, 0x54, 0x02, 0x02, 0x55, 0x02, 0x56, 0x02, 0x02, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x02, 0x64, 0x65, - 0x66, 0x67, 0x02, 0x68, 0x02, 0x69, 0x6A, 0x6B, 0x6C, 0x02, 0x02, 0x6D, 0x6E, 0x6F, 0x70, - 0x02, 0x71, 0x72, 0x02, + 0x66, 0x67, 0x02, 0x68, 0x02, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x02, 0x6E, 0x6F, 0x70, 0x71, + 0x02, 0x72, 0x73, 0x02, ], [ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x73, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x74, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ], @@ -1060,15 +1061,15 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ 0x02, 0x02, 0x02, 0x02, ], [ - 0x02, 0x02, 0x74, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x75, 0x76, 0x02, 0x02, 0x02, 0x77, 0x02, 0x02, 0x02, - 0x78, 0x79, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, + 0x02, 0x02, 0x75, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x76, 0x77, 0x02, 0x02, 0x02, 0x78, 0x79, 0x02, 0x02, + 0x7A, 0x7B, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, ], [ 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, - 0x7A, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x7B, 0x7C, 0x02, 0x02, 0x02, + 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x7C, 0x7D, 0x7E, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, @@ -1076,22 +1077,22 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ [ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x7D, 0x39, 0x39, 0x7E, 0x39, 0x39, 0x7F, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, 0x02, 0x02, + 0x02, 0x7F, 0x39, 0x39, 0x80, 0x39, 0x39, 0x81, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x82, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ], [ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x81, 0x02, 0x02, 0x02, 0x82, 0x83, 0x84, 0x02, 0x85, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x86, 0x87, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x83, 0x02, 0x02, 0x02, 0x84, 0x85, 0x86, 0x02, 0x87, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x88, 0x89, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ], [ - 0x88, 0x89, 0x76, 0x02, 0x02, 0x8A, 0x02, 0x02, 0x02, 0x8B, 0x02, 0x8C, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x8D, 0x8E, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x8F, 0x90, 0x02, 0x91, 0x92, 0x02, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, - 0x9A, 0x02, 0x9B, 0x02, 0x02, 0x9C, 0x9D, 0x9E, 0x9F, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x8A, 0x8B, 0x77, 0x02, 0x02, 0x8C, 0x02, 0x02, 0x02, 0x8D, 0x02, 0x8E, 0x02, 0x8F, 0x02, + 0x02, 0x02, 0x90, 0x91, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x92, 0x93, 0x02, 0x94, 0x95, 0x02, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, + 0x9D, 0x02, 0x9E, 0x02, 0x02, 0x9F, 0xA0, 0xA1, 0xA2, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ], [ @@ -1099,7 +1100,7 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, - 0x39, 0x39, 0x39, 0xA0, + 0x39, 0x39, 0x39, 0xA3, ], [ 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, 0x1D, @@ -1110,7 +1111,7 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ ], #[cfg(feature = "cjk")] [ - 0x00, 0xA1, 0x02, 0x02, 0x02, 0x02, 0xA2, 0xA3, 0x02, 0x04, 0x02, 0x05, 0x06, 0x07, 0x08, + 0x00, 0xA4, 0x02, 0x02, 0x02, 0x02, 0xA5, 0xA6, 0x02, 0x04, 0x02, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x02, 0x02, 0x1E, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x02, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x02, 0x2A, @@ -1118,9 +1119,9 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ ], #[cfg(feature = "cjk")] [ - 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0x2E, 0xAB, 0x39, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, - 0xB1, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0xB2, 0x02, 0x02, 0x35, 0x36, 0x37, 0x02, 0x38, - 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0xB3, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, + 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0x2E, 0xAE, 0x39, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, + 0xB4, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0xB5, 0x02, 0x02, 0x35, 0x36, 0x37, 0x02, 0x38, + 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0xB6, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, ], @@ -1130,23 +1131,23 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([ 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x4B, 0x02, 0x02, 0x02, 0x02, 0x02, - 0xB4, 0x4D, 0x4E, 0xB5, + 0xB7, 0x4D, 0x4E, 0xB8, ], #[cfg(feature = "cjk")] [ - 0x88, 0x89, 0x76, 0x02, 0x02, 0x8A, 0x02, 0x02, 0x02, 0x8B, 0x02, 0x8C, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x8D, 0x8E, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x8F, 0x90, 0xB6, 0xB7, 0x92, 0x02, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, - 0x9A, 0x02, 0x9B, 0x02, 0x02, 0x9C, 0x9D, 0x9E, 0x9F, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x8A, 0x8B, 0x77, 0x02, 0x02, 0x8C, 0x02, 0x02, 0x02, 0x8D, 0x02, 0x8E, 0x02, 0x8F, 0x02, + 0x02, 0x02, 0x90, 0x91, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x92, 0x93, 0xB9, 0xBA, 0x95, 0x02, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, + 0x9D, 0x02, 0x9E, 0x02, 0x02, 0x9F, 0xA0, 0xA1, 0xA2, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, ], ]); #[cfg(feature = "cjk")] -const WIDTH_LEAVES_LEN: usize = 184; +const WIDTH_LEAVES_LEN: usize = 187; #[cfg(not(feature = "cjk"))] -const WIDTH_LEAVES_LEN: usize = 161; -/// Autogenerated. 184 sub-table(s). Consult [`lookup_width`] for layout info. +const WIDTH_LEAVES_LEN: usize = 164; +/// Autogenerated. 187 sub-table(s). Consult [`lookup_width`] for layout info. static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ [ 0x55, 0x55, 0x75, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, @@ -1230,7 +1231,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ ], [ 0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, - 0x04, 0x01, 0x54, 0x55, 0x51, 0x55, 0x01, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x04, 0x01, 0x54, 0x55, 0x51, 0x15, 0x00, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, ], [ @@ -1335,7 +1336,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ ], [ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, ], [ @@ -1586,7 +1587,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ [ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, - 0x55, 0x00, + 0x05, 0x00, ], [ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, @@ -1693,6 +1694,11 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, ], + [ + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x04, 0x44, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, + ], [ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x40, 0x00, 0x10, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, @@ -1749,18 +1755,18 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ 0x55, 0x55, ], [ + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x54, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, - 0x55, 0x55, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, ], [ - 0x55, 0x55, 0x55, 0x15, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, - 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xAA, 0x54, 0x55, 0x55, 0x50, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, ], [ - 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, - 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0x55, 0x55, 0x55, 0x15, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xAA, 0x54, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, ], [ @@ -1769,10 +1775,15 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ 0x55, 0x95, ], [ - 0xAA, 0xAA, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, ], + [ + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0x55, + 0x55, 0x55, + ], [ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xAA, 0xA9, @@ -1853,6 +1864,11 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, ], + [ + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x45, 0x55, 0x05, 0x55, 0x51, + 0x55, 0x55, + ], [ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, @@ -1920,7 +1936,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ ], [ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x5F, 0x55, 0x57, 0x7F, 0xFD, 0x55, 0xFF, 0x55, 0x55, 0xD5, 0x57, 0x55, 0xFF, + 0xFF, 0xFF, 0x5F, 0x55, 0x57, 0x7F, 0xFD, 0x57, 0xFF, 0x55, 0x55, 0xD5, 0x57, 0x55, 0xFF, 0xFF, 0x57, ], [ @@ -1944,8 +1960,8 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ 0xFF, 0x57, ], [ - 0xFF, 0xFF, 0x5F, 0xD5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x7F, 0x55, 0xF5, 0xFF, 0xFF, 0xFF, 0xD7, 0xFF, 0xFF, 0x5F, 0x55, 0xFF, 0xFF, + 0xFF, 0xFF, 0x7F, 0xF5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x7F, 0x57, 0xFD, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, 0xFF, 0x7F, 0xD5, 0xFF, 0xFF, 0x57, 0x55, ], [ @@ -2069,7 +2085,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ ], #[cfg(feature = "cjk")] [ - 0x03, 0x00, 0x00, 0xC0, 0xAA, 0xAA, 0x5A, 0x55, 0x00, 0x00, 0x00, 0x00, 0xAA, 0xAA, 0xAA, + 0x33, 0x00, 0x00, 0xC0, 0xAA, 0xAA, 0x5A, 0x55, 0x00, 0x00, 0x00, 0x00, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0xAA, 0x55, 0x55, 0x55, 0x55, 0x55, ], @@ -2176,7 +2192,7 @@ static NON_TRANSPARENT_ZERO_WIDTHS: [([u8; 3], [u8; 3]); 72] = [ /// (mostly ccc > 1). /// FIXME: can we get better compression? #[cfg(feature = "cjk")] -static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); 202] = [ +static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); 209] = [ ([0x00, 0x03, 0x00], [0x33, 0x03, 0x00]), ([0x39, 0x03, 0x00], [0x6F, 0x03, 0x00]), ([0x83, 0x04, 0x00], [0x87, 0x04, 0x00]), @@ -2253,7 +2269,8 @@ static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); 202] = [ ([0x75, 0x1A, 0x00], [0x7C, 0x1A, 0x00]), ([0x7F, 0x1A, 0x00], [0x7F, 0x1A, 0x00]), ([0xB0, 0x1A, 0x00], [0xBD, 0x1A, 0x00]), - ([0xBF, 0x1A, 0x00], [0xCE, 0x1A, 0x00]), + ([0xBF, 0x1A, 0x00], [0xDD, 0x1A, 0x00]), + ([0xE0, 0x1A, 0x00], [0xEB, 0x1A, 0x00]), ([0x34, 0x1B, 0x00], [0x34, 0x1B, 0x00]), ([0x44, 0x1B, 0x00], [0x44, 0x1B, 0x00]), ([0x6B, 0x1B, 0x00], [0x73, 0x1B, 0x00]), @@ -2316,6 +2333,7 @@ static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); 202] = [ ([0x69, 0x0D, 0x01], [0x6D, 0x0D, 0x01]), ([0x74, 0x0E, 0x01], [0x74, 0x0E, 0x01]), ([0xAB, 0x0E, 0x01], [0xAC, 0x0E, 0x01]), + ([0xFA, 0x0E, 0x01], [0xFB, 0x0E, 0x01]), ([0xFD, 0x0E, 0x01], [0xFF, 0x0E, 0x01]), ([0x46, 0x0F, 0x01], [0x50, 0x0F, 0x01]), ([0x82, 0x0F, 0x01], [0x85, 0x0F, 0x01]), @@ -2357,6 +2375,7 @@ static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); 202] = [ ([0x41, 0x1F, 0x01], [0x42, 0x1F, 0x01]), ([0x2F, 0x61, 0x01], [0x2F, 0x61, 0x01]), ([0x30, 0x6B, 0x01], [0x36, 0x6B, 0x01]), + ([0x9D, 0x6D, 0x01], [0x9D, 0x6D, 0x01]), ([0xF0, 0x6F, 0x01], [0xF1, 0x6F, 0x01]), ([0x65, 0xD1, 0x01], [0x66, 0xD1, 0x01]), ([0x6D, 0xD1, 0x01], [0x72, 0xD1, 0x01]), @@ -2375,6 +2394,10 @@ static SOLIDUS_TRANSPARENT: [([u8; 3], [u8; 3]); 202] = [ ([0xEC, 0xE2, 0x01], [0xEF, 0xE2, 0x01]), ([0xEC, 0xE4, 0x01], [0xEF, 0xE4, 0x01]), ([0xEE, 0xE5, 0x01], [0xEF, 0xE5, 0x01]), + ([0xE3, 0xE6, 0x01], [0xE3, 0xE6, 0x01]), + ([0xE6, 0xE6, 0x01], [0xE6, 0xE6, 0x01]), + ([0xEE, 0xE6, 0x01], [0xEF, 0xE6, 0x01]), + ([0xF5, 0xE6, 0x01], [0xF5, 0xE6, 0x01]), ([0xD0, 0xE8, 0x01], [0xD6, 0xE8, 0x01]), ([0x44, 0xE9, 0x01], [0x4A, 0xE9, 0x01]), ([0x85, 0xEC, 0x01], [0x85, 0xEC, 0x01]), @@ -2749,7 +2772,7 @@ mod tests { WidthInfo::TAG_A6_END_ZWJ_EMOJI_PRESENTATION, WidthInfo::KIRAT_RAI_VOWEL_SIGN_E, WidthInfo::KIRAT_RAI_VOWEL_SIGN_AI, - WidthInfo::VARIATION_SELECTOR_1_OR_2, + WidthInfo::VARIATION_SELECTOR_1_2_OR_3, WidthInfo::VARIATION_SELECTOR_15, WidthInfo::VARIATION_SELECTOR_16, WidthInfo::JOINING_GROUP_ALEF, @@ -2794,7 +2817,7 @@ mod tests { WidthInfo::TAG_A6_END_ZWJ_EMOJI_PRESENTATION, WidthInfo::KIRAT_RAI_VOWEL_SIGN_E, WidthInfo::KIRAT_RAI_VOWEL_SIGN_AI, - WidthInfo::VARIATION_SELECTOR_1_OR_2, + WidthInfo::VARIATION_SELECTOR_1_2_OR_3, WidthInfo::VARIATION_SELECTOR_16, WidthInfo::JOINING_GROUP_ALEF, WidthInfo::COMBINING_LONG_SOLIDUS_OVERLAY, @@ -2814,7 +2837,7 @@ mod tests { ]; #[rustfmt::skip] - static NORMALIZATION_TEST: [(&str, &str, &str, &str, &str); 19965] = [ + static NORMALIZATION_TEST: [(&str, &str, &str, &str, &str); 20036] = [ (r#"Ḋ"#, r#"Ḋ"#, r#"Ḋ"#, r#"Ḋ"#, r#"Ḋ"#), (r#"Ḍ"#, r#"Ḍ"#, r#"Ḍ"#, r#"Ḍ"#, r#"Ḍ"#), (r#"Ḍ̇"#, r#"Ḍ̇"#, r#"Ḍ̇"#, r#"Ḍ̇"#, r#"Ḍ̇"#), @@ -5199,6 +5222,7 @@ mod tests { (r#"ꚜ"#, r#"ꚜ"#, r#"ꚜ"#, r#"ъ"#, r#"ъ"#), (r#"ꚝ"#, r#"ꚝ"#, r#"ꚝ"#, r#"ь"#, r#"ь"#), (r#"ꝰ"#, r#"ꝰ"#, r#"ꝰ"#, r#"ꝯ"#, r#"ꝯ"#), + (r#"꟱"#, r#"꟱"#, r#"꟱"#, r#"S"#, r#"S"#), (r#"ꟲ"#, r#"ꟲ"#, r#"ꟲ"#, r#"C"#, r#"C"#), (r#"ꟳ"#, r#"ꟳ"#, r#"ꟳ"#, r#"F"#, r#"F"#), (r#"ꟴ"#, r#"ꟴ"#, r#"ꟴ"#, r#"Q"#, r#"Q"#), @@ -20865,6 +20889,60 @@ mod tests { (r#"a֮ᫍ̀̕b"#, r#"a֮ᫍ̀̕b"#, r#"a֮ᫍ̀̕b"#, r#"a֮ᫍ̀̕b"#, r#"a֮ᫍ̀̕b"#), (r#"à֮ᫎ̕b"#, r#"à֮ᫎ̕b"#, r#"à֮ᫎ̕b"#, r#"à֮ᫎ̕b"#, r#"à֮ᫎ̕b"#), (r#"a֮ᫎ̀̕b"#, r#"a֮ᫎ̀̕b"#, r#"a֮ᫎ̀̕b"#, r#"a֮ᫎ̀̕b"#, r#"a֮ᫎ̀̕b"#), + (r#"à֮̕᫏b"#, r#"à֮᫏̕b"#, r#"à֮᫏̕b"#, r#"à֮᫏̕b"#, r#"à֮᫏̕b"#), + (r#"a᫏֮̀̕b"#, r#"a֮᫏̀̕b"#, r#"a֮᫏̀̕b"#, r#"a֮᫏̀̕b"#, r#"a֮᫏̀̕b"#), + (r#"à֮̕᫐b"#, r#"à֮᫐̕b"#, r#"à֮᫐̕b"#, r#"à֮᫐̕b"#, r#"à֮᫐̕b"#), + (r#"a᫐֮̀̕b"#, r#"a֮᫐̀̕b"#, r#"a֮᫐̀̕b"#, r#"a֮᫐̀̕b"#, r#"a֮᫐̀̕b"#), + (r#"à֮̕᫑b"#, r#"à֮᫑̕b"#, r#"à֮᫑̕b"#, r#"à֮᫑̕b"#, r#"à֮᫑̕b"#), + (r#"a᫑֮̀̕b"#, r#"a֮᫑̀̕b"#, r#"a֮᫑̀̕b"#, r#"a֮᫑̀̕b"#, r#"a֮᫑̀̕b"#), + (r#"à֮̕᫒b"#, r#"à֮᫒̕b"#, r#"à֮᫒̕b"#, r#"à֮᫒̕b"#, r#"à֮᫒̕b"#), + (r#"a᫒֮̀̕b"#, r#"a֮᫒̀̕b"#, r#"a֮᫒̀̕b"#, r#"a֮᫒̀̕b"#, r#"a֮᫒̀̕b"#), + (r#"à֮̕᫓b"#, r#"à֮᫓̕b"#, r#"à֮᫓̕b"#, r#"à֮᫓̕b"#, r#"à֮᫓̕b"#), + (r#"a᫓֮̀̕b"#, r#"a֮᫓̀̕b"#, r#"a֮᫓̀̕b"#, r#"a֮᫓̀̕b"#, r#"a֮᫓̀̕b"#), + (r#"à֮̕᫔b"#, r#"à֮᫔̕b"#, r#"à֮᫔̕b"#, r#"à֮᫔̕b"#, r#"à֮᫔̕b"#), + (r#"a᫔֮̀̕b"#, r#"a֮᫔̀̕b"#, r#"a֮᫔̀̕b"#, r#"a֮᫔̀̕b"#, r#"a֮᫔̀̕b"#), + (r#"à֮̕᫕b"#, r#"à֮᫕̕b"#, r#"à֮᫕̕b"#, r#"à֮᫕̕b"#, r#"à֮᫕̕b"#), + (r#"a᫕֮̀̕b"#, r#"a֮᫕̀̕b"#, r#"a֮᫕̀̕b"#, r#"a֮᫕̀̕b"#, r#"a֮᫕̀̕b"#), + (r#"à֮̕᫖b"#, r#"à֮᫖̕b"#, r#"à֮᫖̕b"#, r#"à֮᫖̕b"#, r#"à֮᫖̕b"#), + (r#"a᫖֮̀̕b"#, r#"a֮᫖̀̕b"#, r#"a֮᫖̀̕b"#, r#"a֮᫖̀̕b"#, r#"a֮᫖̀̕b"#), + (r#"à֮̕᫗b"#, r#"à֮᫗̕b"#, r#"à֮᫗̕b"#, r#"à֮᫗̕b"#, r#"à֮᫗̕b"#), + (r#"a᫗֮̀̕b"#, r#"a֮᫗̀̕b"#, r#"a֮᫗̀̕b"#, r#"a֮᫗̀̕b"#, r#"a֮᫗̀̕b"#), + (r#"à֮̕᫘b"#, r#"à֮᫘̕b"#, r#"à֮᫘̕b"#, r#"à֮᫘̕b"#, r#"à֮᫘̕b"#), + (r#"a᫘֮̀̕b"#, r#"a֮᫘̀̕b"#, r#"a֮᫘̀̕b"#, r#"a֮᫘̀̕b"#, r#"a֮᫘̀̕b"#), + (r#"à֮̕᫙b"#, r#"à֮᫙̕b"#, r#"à֮᫙̕b"#, r#"à֮᫙̕b"#, r#"à֮᫙̕b"#), + (r#"a᫙֮̀̕b"#, r#"a֮᫙̀̕b"#, r#"a֮᫙̀̕b"#, r#"a֮᫙̀̕b"#, r#"a֮᫙̀̕b"#), + (r#"à֮̕᫚b"#, r#"à֮᫚̕b"#, r#"à֮᫚̕b"#, r#"à֮᫚̕b"#, r#"à֮᫚̕b"#), + (r#"a᫚֮̀̕b"#, r#"a֮᫚̀̕b"#, r#"a֮᫚̀̕b"#, r#"a֮᫚̀̕b"#, r#"a֮᫚̀̕b"#), + (r#"à֮̕᫛b"#, r#"à֮᫛̕b"#, r#"à֮᫛̕b"#, r#"à֮᫛̕b"#, r#"à֮᫛̕b"#), + (r#"a᫛֮̀̕b"#, r#"a֮᫛̀̕b"#, r#"a֮᫛̀̕b"#, r#"a֮᫛̀̕b"#, r#"a֮᫛̀̕b"#), + (r#"à֮̕᫜b"#, r#"à֮᫜̕b"#, r#"à֮᫜̕b"#, r#"à֮᫜̕b"#, r#"à֮᫜̕b"#), + (r#"a᫜֮̀̕b"#, r#"a֮᫜̀̕b"#, r#"a֮᫜̀̕b"#, r#"a֮᫜̀̕b"#, r#"a֮᫜̀̕b"#), + (r#"a᷺̖֚᫝b"#, r#"a᷺̖᫝֚b"#, r#"a᷺̖᫝֚b"#, r#"a᷺̖᫝֚b"#, r#"a᷺̖᫝֚b"#), + (r#"a᫝᷺̖֚b"#, r#"a᷺᫝̖֚b"#, r#"a᷺᫝̖֚b"#, r#"a᷺᫝̖֚b"#, r#"a᷺᫝̖֚b"#), + (r#"à֮̕᫠b"#, r#"à֮᫠̕b"#, r#"à֮᫠̕b"#, r#"à֮᫠̕b"#, r#"à֮᫠̕b"#), + (r#"a᫠֮̀̕b"#, r#"a֮᫠̀̕b"#, r#"a֮᫠̀̕b"#, r#"a֮᫠̀̕b"#, r#"a֮᫠̀̕b"#), + (r#"à֮̕᫡b"#, r#"à֮᫡̕b"#, r#"à֮᫡̕b"#, r#"à֮᫡̕b"#, r#"à֮᫡̕b"#), + (r#"a᫡֮̀̕b"#, r#"a֮᫡̀̕b"#, r#"a֮᫡̀̕b"#, r#"a֮᫡̀̕b"#, r#"a֮᫡̀̕b"#), + (r#"à֮̕᫢b"#, r#"à֮᫢̕b"#, r#"à֮᫢̕b"#, r#"à֮᫢̕b"#, r#"à֮᫢̕b"#), + (r#"a᫢֮̀̕b"#, r#"a֮᫢̀̕b"#, r#"a֮᫢̀̕b"#, r#"a֮᫢̀̕b"#, r#"a֮᫢̀̕b"#), + (r#"à֮̕᫣b"#, r#"à֮᫣̕b"#, r#"à֮᫣̕b"#, r#"à֮᫣̕b"#, r#"à֮᫣̕b"#), + (r#"a᫣֮̀̕b"#, r#"a֮᫣̀̕b"#, r#"a֮᫣̀̕b"#, r#"a֮᫣̀̕b"#, r#"a֮᫣̀̕b"#), + (r#"à֮̕᫤b"#, r#"à֮᫤̕b"#, r#"à֮᫤̕b"#, r#"à֮᫤̕b"#, r#"à֮᫤̕b"#), + (r#"a᫤֮̀̕b"#, r#"a֮᫤̀̕b"#, r#"a֮᫤̀̕b"#, r#"a֮᫤̀̕b"#, r#"a֮᫤̀̕b"#), + (r#"à֮̕᫥b"#, r#"à֮᫥̕b"#, r#"à֮᫥̕b"#, r#"à֮᫥̕b"#, r#"à֮᫥̕b"#), + (r#"a᫥֮̀̕b"#, r#"a֮᫥̀̕b"#, r#"a֮᫥̀̕b"#, r#"a֮᫥̀̕b"#, r#"a֮᫥̀̕b"#), + (r#"a᷺̖֚᫦b"#, r#"a᷺̖᫦֚b"#, r#"a᷺̖᫦֚b"#, r#"a᷺̖᫦֚b"#, r#"a᷺̖᫦֚b"#), + (r#"a᫦᷺̖֚b"#, r#"a᷺᫦̖֚b"#, r#"a᷺᫦̖֚b"#, r#"a᷺᫦̖֚b"#, r#"a᷺᫦̖֚b"#), + (r#"à֮̕᫧b"#, r#"à֮᫧̕b"#, r#"à֮᫧̕b"#, r#"à֮᫧̕b"#, r#"à֮᫧̕b"#), + (r#"a᫧֮̀̕b"#, r#"a֮᫧̀̕b"#, r#"a֮᫧̀̕b"#, r#"a֮᫧̀̕b"#, r#"a֮᫧̀̕b"#), + (r#"à֮̕᫨b"#, r#"à֮᫨̕b"#, r#"à֮᫨̕b"#, r#"à֮᫨̕b"#, r#"à֮᫨̕b"#), + (r#"a᫨֮̀̕b"#, r#"a֮᫨̀̕b"#, r#"a֮᫨̀̕b"#, r#"a֮᫨̀̕b"#, r#"a֮᫨̀̕b"#), + (r#"à֮̕᫩b"#, r#"à֮᫩̕b"#, r#"à֮᫩̕b"#, r#"à֮᫩̕b"#, r#"à֮᫩̕b"#), + (r#"a᫩֮̀̕b"#, r#"a֮᫩̀̕b"#, r#"a֮᫩̀̕b"#, r#"a֮᫩̀̕b"#, r#"a֮᫩̀̕b"#), + (r#"à֮̕᫪b"#, r#"à֮᫪̕b"#, r#"à֮᫪̕b"#, r#"à֮᫪̕b"#, r#"à֮᫪̕b"#), + (r#"a᫪֮̀̕b"#, r#"a֮᫪̀̕b"#, r#"a֮᫪̀̕b"#, r#"a֮᫪̀̕b"#, r#"a֮᫪̀̕b"#), + (r#"a͜͝ͅ᫫b"#, r#"a͜͝᫫ͅb"#, r#"a͜͝᫫ͅb"#, r#"a͜͝᫫ͅb"#, r#"a͜͝᫫ͅb"#), + (r#"a᫫͜͝ͅb"#, r#"a͜᫫͝ͅb"#, r#"a͜᫫͝ͅb"#, r#"a͜᫫͝ͅb"#, r#"a͜᫫͝ͅb"#), (r#"a𖿰़᬴゙b"#, r#"a𖿰़᬴゙b"#, r#"a𖿰़᬴゙b"#, r#"a𖿰़᬴゙b"#, r#"a𖿰़᬴゙b"#), (r#"a𖿰᬴़゙b"#, r#"a𖿰᬴़゙b"#, r#"a𖿰᬴़゙b"#, r#"a𖿰᬴़゙b"#, r#"a𖿰᬴़゙b"#), (r#"a゙्᭄ְb"#, r#"a゙्᭄ְb"#, r#"a゙्᭄ְb"#, r#"a゙्᭄ְb"#, r#"a゙्᭄ְb"#), @@ -21413,6 +21491,10 @@ mod tests { (r#"a֮𐺫̀̕b"#, r#"a֮𐺫̀̕b"#, r#"a֮𐺫̀̕b"#, r#"a֮𐺫̀̕b"#, r#"a֮𐺫̀̕b"#), (r#"à֮𐺬̕b"#, r#"à֮𐺬̕b"#, r#"à֮𐺬̕b"#, r#"à֮𐺬̕b"#, r#"à֮𐺬̕b"#), (r#"a֮𐺬̀̕b"#, r#"a֮𐺬̀̕b"#, r#"a֮𐺬̀̕b"#, r#"a֮𐺬̀̕b"#, r#"a֮𐺬̀̕b"#), + (r#"a᷺̖֚𐻺b"#, r#"a᷺̖𐻺֚b"#, r#"a᷺̖𐻺֚b"#, r#"a᷺̖𐻺֚b"#, r#"a᷺̖𐻺֚b"#), + (r#"a𐻺᷺̖֚b"#, r#"a᷺𐻺̖֚b"#, r#"a᷺𐻺̖֚b"#, r#"a᷺𐻺̖֚b"#, r#"a᷺𐻺̖֚b"#), + (r#"a᷺̖֚𐻻b"#, r#"a᷺̖𐻻֚b"#, r#"a᷺̖𐻻֚b"#, r#"a᷺̖𐻻֚b"#, r#"a᷺̖𐻻֚b"#), + (r#"a𐻻᷺̖֚b"#, r#"a᷺𐻻̖֚b"#, r#"a᷺𐻻̖֚b"#, r#"a᷺𐻻̖֚b"#, r#"a᷺𐻻̖֚b"#), (r#"a᷺̖𐻽֚b"#, r#"a᷺̖𐻽֚b"#, r#"a᷺̖𐻽֚b"#, r#"a᷺̖𐻽֚b"#, r#"a᷺̖𐻽֚b"#), (r#"a᷺𐻽̖֚b"#, r#"a᷺𐻽̖֚b"#, r#"a᷺𐻽̖֚b"#, r#"a᷺𐻽̖֚b"#, r#"a᷺𐻽̖֚b"#), (r#"a᷺̖𐻾֚b"#, r#"a᷺̖𐻾֚b"#, r#"a᷺̖𐻾֚b"#, r#"a᷺̖𐻾֚b"#, r#"a᷺̖𐻾֚b"#), @@ -21599,6 +21681,8 @@ mod tests { (r#"a֮𖬵̀̕b"#, r#"a֮𖬵̀̕b"#, r#"a֮𖬵̀̕b"#, r#"a֮𖬵̀̕b"#, r#"a֮𖬵̀̕b"#), (r#"à֮𖬶̕b"#, r#"à֮𖬶̕b"#, r#"à֮𖬶̕b"#, r#"à֮𖬶̕b"#, r#"à֮𖬶̕b"#), (r#"a֮𖬶̀̕b"#, r#"a֮𖬶̀̕b"#, r#"a֮𖬶̀̕b"#, r#"a֮𖬶̀̕b"#, r#"a֮𖬶̀̕b"#), + (r#"a゙्ְ𖶝b"#, r#"a゙्𖶝ְb"#, r#"a゙्𖶝ְb"#, r#"a゙्𖶝ְb"#, r#"a゙्𖶝ְb"#), + (r#"a𖶝゙्ְb"#, r#"a゙𖶝्ְb"#, r#"a゙𖶝्ְb"#, r#"a゙𖶝्ְb"#, r#"a゙𖶝्ְb"#), (r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#), (r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#, r#"a̴𖿰𖿰़b"#), (r#"a̴𖿰𖿱़b"#, r#"a̴𖿰𖿱़b"#, r#"a̴𖿰𖿱़b"#, r#"a̴𖿰𖿱़b"#, r#"a̴𖿰𖿱़b"#), @@ -21785,6 +21869,16 @@ mod tests { (r#"a֮𞗮̀̕b"#, r#"a֮𞗮̀̕b"#, r#"a֮𞗮̀̕b"#, r#"a֮𞗮̀̕b"#, r#"a֮𞗮̀̕b"#), (r#"a᷺̖𞗯֚b"#, r#"a᷺̖𞗯֚b"#, r#"a᷺̖𞗯֚b"#, r#"a᷺̖𞗯֚b"#, r#"a᷺̖𞗯֚b"#), (r#"a᷺𞗯̖֚b"#, r#"a᷺𞗯̖֚b"#, r#"a᷺𞗯̖֚b"#, r#"a᷺𞗯̖֚b"#, r#"a᷺𞗯̖֚b"#), + (r#"à֮̕𞛣b"#, r#"à֮𞛣̕b"#, r#"à֮𞛣̕b"#, r#"à֮𞛣̕b"#, r#"à֮𞛣̕b"#), + (r#"a𞛣֮̀̕b"#, r#"a֮𞛣̀̕b"#, r#"a֮𞛣̀̕b"#, r#"a֮𞛣̀̕b"#, r#"a֮𞛣̀̕b"#), + (r#"à֮̕𞛦b"#, r#"à֮𞛦̕b"#, r#"à֮𞛦̕b"#, r#"à֮𞛦̕b"#, r#"à֮𞛦̕b"#), + (r#"a𞛦֮̀̕b"#, r#"a֮𞛦̀̕b"#, r#"a֮𞛦̀̕b"#, r#"a֮𞛦̀̕b"#, r#"a֮𞛦̀̕b"#), + (r#"à֮̕𞛮b"#, r#"à֮𞛮̕b"#, r#"à֮𞛮̕b"#, r#"à֮𞛮̕b"#, r#"à֮𞛮̕b"#), + (r#"a𞛮֮̀̕b"#, r#"a֮𞛮̀̕b"#, r#"a֮𞛮̀̕b"#, r#"a֮𞛮̀̕b"#, r#"a֮𞛮̀̕b"#), + (r#"à֮̕𞛯b"#, r#"à֮𞛯̕b"#, r#"à֮𞛯̕b"#, r#"à֮𞛯̕b"#, r#"à֮𞛯̕b"#), + (r#"a𞛯֮̀̕b"#, r#"a֮𞛯̀̕b"#, r#"a֮𞛯̀̕b"#, r#"a֮𞛯̀̕b"#, r#"a֮𞛯̀̕b"#), + (r#"à֮̕𞛵b"#, r#"à֮𞛵̕b"#, r#"à֮𞛵̕b"#, r#"à֮𞛵̕b"#, r#"à֮𞛵̕b"#), + (r#"a𞛵֮̀̕b"#, r#"a֮𞛵̀̕b"#, r#"a֮𞛵̀̕b"#, r#"a֮𞛵̀̕b"#, r#"a֮𞛵̀̕b"#), (r#"a᷺̖𞣐֚b"#, r#"a᷺̖𞣐֚b"#, r#"a᷺̖𞣐֚b"#, r#"a᷺̖𞣐֚b"#, r#"a᷺̖𞣐֚b"#), (r#"a᷺𞣐̖֚b"#, r#"a᷺𞣐̖֚b"#, r#"a᷺𞣐̖֚b"#, r#"a᷺𞣐̖֚b"#, r#"a᷺𞣐̖֚b"#), (r#"a᷺̖𞣑֚b"#, r#"a᷺̖𞣑֚b"#, r#"a᷺̖𞣑֚b"#, r#"a᷺̖𞣑֚b"#, r#"a᷺̖𞣑֚b"#), diff --git a/tests/tests.rs b/tests/tests.rs index 16b96b0..6299f19 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -598,9 +598,10 @@ fn ambiguous_line_break() { } #[test] -fn test_vs1_vs2() { +fn test_vs1_vs2_vs3() { assert_width!('\u{FE00}', Some(0), Some(0)); assert_width!('\u{FE01}', Some(0), Some(0)); + assert_width!('\u{FE02}', Some(0), Some(0)); for c in '\0'..=char::MAX { if matches!(c, '\u{2018}' | '\u{2019}' | '\u{201C}' | '\u{201D}') { assert_width!(c, Some(1), Some(2)); @@ -608,6 +609,8 @@ fn test_vs1_vs2() { assert_width!(format!("{c}\u{FE00}\u{FE01}"), 1, 1); assert_width!(format!("{c}\u{FE01}"), 2, 2); assert_width!(format!("{c}\u{FE01}\u{FE00}"), 2, 2); + assert_width!(format!("{c}\u{FE02}"), 1, 1); + assert_width!(format!("{c}\u{FE02}\u{FE01}"), 1, 1); } else { assert_eq!( format!("{c}\u{FE00}").width(), @@ -631,6 +634,17 @@ fn test_vs1_vs2() { c.width_cjk().unwrap_or(1), "{c:?}" ); + assert_eq!( + format!("{c}\u{FE02}").width(), + c.width().unwrap_or(1), + "{c:?}" + ); + #[cfg(feature = "cjk")] + assert_eq!( + format!("{c}\u{FE02}").width_cjk(), + c.width_cjk().unwrap_or(1), + "{c:?}" + ); } } }