@@ -2,6 +2,7 @@ use std::borrow::Cow;
2
2
use std:: env;
3
3
use std:: fmt;
4
4
use std:: fmt:: { Debug , Formatter } ;
5
+ use std:: ops:: Range ;
5
6
use std:: sync:: atomic:: { AtomicBool , Ordering } ;
6
7
7
8
use once_cell:: sync:: Lazy ;
@@ -807,80 +808,123 @@ pub(crate) fn char_width(_c: char) -> usize {
807
808
1
808
809
}
809
810
810
- /// Truncates a string to a certain number of characters.
811
+ /// Slice a `&str` in terms of text width. This means that only the text
812
+ /// columns strictly between `start` and `stop` will be kept.
811
813
///
812
- /// This ensures that escape codes are not screwed up in the process.
813
- /// If the maximum length is hit the string will be truncated but
814
- /// escapes code will still be honored. If truncation takes place
815
- /// the tail string will be appended.
816
- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
814
+ /// If a multi-columns character overlaps with the end of the interval it will
815
+ /// not be included. In such a case, the result will be less than `end - start`
816
+ /// columns wide.
817
+ ///
818
+ /// This ensures that escape codes are not screwed up in the process. And if
819
+ /// non-empty head and tail are specified, they are inserted between the ANSI
820
+ /// codes from truncated bounds and the slice.
821
+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
817
822
#[ cfg( feature = "ansi-parsing" ) ]
818
823
{
819
- use std:: cmp:: Ordering ;
820
- let mut iter = AnsiCodeIterator :: new ( s) ;
821
- let mut length = 0 ;
822
- let mut rv = None ;
823
-
824
- while let Some ( item) = iter. next ( ) {
825
- match item {
826
- ( s, false ) => {
827
- if rv. is_none ( ) {
828
- if str_width ( s) + length > width - str_width ( tail) {
829
- let ts = iter. current_slice ( ) ;
830
-
831
- let mut s_byte = 0 ;
832
- let mut s_width = 0 ;
833
- let rest_width = width - str_width ( tail) - length;
834
- for c in s. chars ( ) {
835
- s_byte += c. len_utf8 ( ) ;
836
- s_width += char_width ( c) ;
837
- match s_width. cmp ( & rest_width) {
838
- Ordering :: Equal => break ,
839
- Ordering :: Greater => {
840
- s_byte -= c. len_utf8 ( ) ;
841
- break ;
842
- }
843
- Ordering :: Less => continue ,
844
- }
845
- }
846
-
847
- let idx = ts. len ( ) - s. len ( ) + s_byte;
848
- let mut buf = ts[ ..idx] . to_string ( ) ;
849
- buf. push_str ( tail) ;
850
- rv = Some ( buf) ;
851
- }
852
- length += str_width ( s) ;
824
+ let mut pos = 0 ; // Current search index by width
825
+ let mut code_iter = AnsiCodeIterator :: new ( s) . peekable ( ) ;
826
+
827
+ // Search for the begining of the slice while collecting heading ANSI
828
+ // codes
829
+ let mut front_ansi = String :: new ( ) ; // ANSI codes found before bound start
830
+ let mut slice_start = 0 ; // Current search index by bytes
831
+
832
+ // Extract the leading slice, which *may be mutated* to remove just its first character.
833
+ ' search_slice_start: while pos < bounds. start {
834
+ let Some ( ( sub, is_ansi) ) = code_iter. peek_mut ( ) else {
835
+ break ;
836
+ } ;
837
+
838
+ if * is_ansi {
839
+ // Keep track of leading ANSI for later output.
840
+ front_ansi. push_str ( sub) ;
841
+ slice_start += sub. len ( ) ;
842
+ } else {
843
+ for ( c_idx, c) in sub. char_indices ( ) {
844
+ if pos >= bounds. start {
845
+ // Ensure we don't drop the remaining of the slice before searching for the
846
+ // end bound.
847
+ * sub = & sub[ c_idx..] ;
848
+ break ' search_slice_start;
853
849
}
850
+
851
+ pos += char_width ( c) ;
852
+ slice_start += c. len_utf8 ( ) ;
854
853
}
855
- ( s, true ) => {
856
- if let Some ( ref mut rv) = rv {
857
- rv. push_str ( s) ;
858
- }
854
+ }
855
+
856
+ code_iter. next ( ) ;
857
+ }
858
+
859
+ // Search for the end of the slice. This loop is a bit simpler because we don't need to
860
+ // keep track of remaining characters if we cut in the middle of a non-ANSI slice.
861
+ let mut slice_end = slice_start;
862
+
863
+ ' search_slice_end: for ( sub, is_ansi) in & mut code_iter {
864
+ if is_ansi {
865
+ // Keep ANSI in the output slice but don't account for them in the total width.
866
+ slice_end += sub. len ( ) ;
867
+ continue ;
868
+ }
869
+
870
+ for c in sub. chars ( ) {
871
+ let c_width = char_width ( c) ;
872
+
873
+ if pos + c_width > bounds. end {
874
+ // We will only search for ANSI codes after breaking this
875
+ // loop, so we can safely drop the remaining of `sub`
876
+ break ' search_slice_end;
859
877
}
878
+
879
+ pos += c_width;
880
+ slice_end += c. len_utf8 ( ) ;
860
881
}
861
882
}
862
883
863
- if let Some ( buf) = rv {
864
- Cow :: Owned ( buf)
865
- } else {
866
- Cow :: Borrowed ( s)
884
+ // Initialise the result (before appending remaining ANSI slices)
885
+ let slice = & s[ slice_start..slice_end] ;
886
+
887
+ let mut result = {
888
+ if front_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
889
+ // No allocation may have to be performed if there are no bounds.
890
+ Cow :: Borrowed ( slice)
891
+ } else {
892
+ Cow :: Owned ( front_ansi + head + slice + tail)
893
+ }
894
+ } ;
895
+
896
+ // Push back remaining ANSI codes to result
897
+ for ( sub, is_ansi) in code_iter {
898
+ if is_ansi {
899
+ result. to_mut ( ) . push_str ( sub) ;
900
+ }
867
901
}
868
- }
869
902
903
+ result
904
+ }
870
905
#[ cfg( not( feature = "ansi-parsing" ) ) ]
871
906
{
872
- if s. len ( ) <= width - tail. len ( ) {
873
- Cow :: Borrowed ( s)
907
+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
908
+
909
+ if head. is_empty ( ) && tail. is_empty ( ) {
910
+ Cow :: Borrowed ( slice)
874
911
} else {
875
- Cow :: Owned ( format ! (
876
- "{}{}" ,
877
- s. get( ..width - tail. len( ) ) . unwrap_or_default( ) ,
878
- tail
879
- ) )
912
+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
880
913
}
881
914
}
882
915
}
883
916
917
+ /// Truncates a string to a certain number of characters.
918
+ ///
919
+ /// This ensures that escape codes are not screwed up in the process.
920
+ /// If the maximum length is hit the string will be truncated but
921
+ /// escapes code will still be honored. If truncation takes place
922
+ /// the tail string will be appended.
923
+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
924
+ let tail_width = measure_text_width ( tail) ;
925
+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
926
+ }
927
+
884
928
/// Pads a string to fill a certain number of characters.
885
929
///
886
930
/// This will honor ansi codes correctly and allows you to align a string
@@ -1000,11 +1044,64 @@ fn test_truncate_str() {
1000
1044
) ;
1001
1045
}
1002
1046
1047
+ #[ test]
1048
+ fn test_slice_ansi_str ( ) {
1049
+ // Note that 🐶 is two columns wide
1050
+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
1051
+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
1052
+
1053
+ assert_eq ! (
1054
+ slice_str( test_str, ">>>" , 0 ..test_str. len( ) , "<<<" ) ,
1055
+ format!( ">>>{test_str}<<<" ) ,
1056
+ ) ;
1057
+
1058
+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
1059
+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
1060
+
1061
+ assert_eq ! (
1062
+ slice_str( test_str, "" , 5 ..5 , "" ) ,
1063
+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
1064
+ ) ;
1065
+
1066
+ assert_eq ! (
1067
+ slice_str( test_str, "" , 0 ..5 , "" ) ,
1068
+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1069
+ ) ;
1070
+
1071
+ assert_eq ! (
1072
+ slice_str( test_str, "" , 0 ..6 , "" ) ,
1073
+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1074
+ ) ;
1075
+
1076
+ assert_eq ! (
1077
+ slice_str( test_str, "" , 0 ..7 , "" ) ,
1078
+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
1079
+ ) ;
1080
+
1081
+ assert_eq ! (
1082
+ slice_str( test_str, "" , 4 ..9 , "" ) ,
1083
+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
1084
+ ) ;
1085
+
1086
+ assert_eq ! (
1087
+ slice_str( test_str, "" , 7 ..21 , "" ) ,
1088
+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
1089
+ ) ;
1090
+
1091
+ assert_eq ! (
1092
+ slice_str( test_str, ">>>" , 7 ..21 , "<<<" ) ,
1093
+ "\x1b [31m>>>\x1b [1m🐶\x1b [0m world!<<<"
1094
+ ) ;
1095
+ }
1096
+ }
1097
+
1003
1098
#[ test]
1004
1099
fn test_truncate_str_no_ansi ( ) {
1100
+ assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo ba!" ) ;
1005
1101
assert_eq ! ( & truncate_str( "foo bar" , 5 , "" ) , "foo b" ) ;
1006
1102
assert_eq ! ( & truncate_str( "foo bar" , 5 , "!" ) , "foo !" ) ;
1007
1103
assert_eq ! ( & truncate_str( "foo bar baz" , 10 , "..." ) , "foo bar..." ) ;
1104
+ assert_eq ! ( & truncate_str( "foo bar" , 2 , "!!!" ) , "!!!" ) ;
1008
1105
}
1009
1106
1010
1107
#[ test]
0 commit comments