Skip to content

Commit c0e5ff2

Browse files
committed
Add new function ansi::slice_ansi_str
1 parent e6882ab commit c0e5ff2

File tree

2 files changed

+155
-58
lines changed

2 files changed

+155
-58
lines changed

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ pub use crate::term::{
8484
};
8585
pub use crate::utils::{
8686
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
87-
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
88-
Color, Emoji, Style, StyledObject,
87+
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
88+
Attribute, Color, Emoji, Style, StyledObject,
8989
};
9090

9191
#[cfg(feature = "ansi-parsing")]

src/utils.rs

Lines changed: 153 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::Cow;
22
use std::env;
33
use std::fmt;
44
use std::fmt::{Debug, Formatter};
5+
use std::ops::Range;
56
use std::sync::atomic::{AtomicBool, Ordering};
67

78
use once_cell::sync::Lazy;
@@ -807,80 +808,123 @@ pub(crate) fn char_width(_c: char) -> usize {
807808
1
808809
}
809810

810-
/// Truncates a string to a certain number of characters.
811+
/// Slice a `&str` in terms of text width. This means that only the text
812+
/// columns strictly between `start` and `stop` will be kept.
811813
///
812-
/// This ensures that escape codes are not screwed up in the process.
813-
/// If the maximum length is hit the string will be truncated but
814-
/// escapes code will still be honored. If truncation takes place
815-
/// the tail string will be appended.
816-
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
814+
/// If a multi-columns character overlaps with the end of the interval it will
815+
/// not be included. In such a case, the result will be less than `end - start`
816+
/// columns wide.
817+
///
818+
/// This ensures that escape codes are not screwed up in the process. And if
819+
/// non-empty head and tail are specified, they are inserted between the ANSI
820+
/// codes from truncated bounds and the slice.
821+
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
817822
#[cfg(feature = "ansi-parsing")]
818823
{
819-
use std::cmp::Ordering;
820-
let mut iter = AnsiCodeIterator::new(s);
821-
let mut length = 0;
822-
let mut rv = None;
823-
824-
while let Some(item) = iter.next() {
825-
match item {
826-
(s, false) => {
827-
if rv.is_none() {
828-
if str_width(s) + length > width - str_width(tail) {
829-
let ts = iter.current_slice();
830-
831-
let mut s_byte = 0;
832-
let mut s_width = 0;
833-
let rest_width = width - str_width(tail) - length;
834-
for c in s.chars() {
835-
s_byte += c.len_utf8();
836-
s_width += char_width(c);
837-
match s_width.cmp(&rest_width) {
838-
Ordering::Equal => break,
839-
Ordering::Greater => {
840-
s_byte -= c.len_utf8();
841-
break;
842-
}
843-
Ordering::Less => continue,
844-
}
845-
}
846-
847-
let idx = ts.len() - s.len() + s_byte;
848-
let mut buf = ts[..idx].to_string();
849-
buf.push_str(tail);
850-
rv = Some(buf);
851-
}
852-
length += str_width(s);
824+
let mut pos = 0; // Current search index by width
825+
let mut code_iter = AnsiCodeIterator::new(s).peekable();
826+
827+
// Search for the begining of the slice while collecting heading ANSI
828+
// codes
829+
let mut front_ansi = String::new(); // ANSI codes found before bound start
830+
let mut slice_start = 0; // Current search index by bytes
831+
832+
// Extract the leading slice, which *may be mutated* to remove just its first character.
833+
'search_slice_start: while pos < bounds.start {
834+
let Some((sub, is_ansi)) = code_iter.peek_mut() else {
835+
break;
836+
};
837+
838+
if *is_ansi {
839+
// Keep track of leading ANSI for later output.
840+
front_ansi.push_str(sub);
841+
slice_start += sub.len();
842+
} else {
843+
for (c_idx, c) in sub.char_indices() {
844+
if pos >= bounds.start {
845+
// Ensure we don't drop the remaining of the slice before searching for the
846+
// end bound.
847+
*sub = &sub[c_idx..];
848+
break 'search_slice_start;
853849
}
850+
851+
pos += char_width(c);
852+
slice_start += c.len_utf8();
854853
}
855-
(s, true) => {
856-
if let Some(ref mut rv) = rv {
857-
rv.push_str(s);
858-
}
854+
}
855+
856+
code_iter.next();
857+
}
858+
859+
// Search for the end of the slice. This loop is a bit simpler because we don't need to
860+
// keep track of remaining characters if we cut in the middle of a non-ANSI slice.
861+
let mut slice_end = slice_start;
862+
863+
'search_slice_end: for (sub, is_ansi) in &mut code_iter {
864+
if is_ansi {
865+
// Keep ANSI in the output slice but don't account for them in the total width.
866+
slice_end += sub.len();
867+
continue;
868+
}
869+
870+
for c in sub.chars() {
871+
let c_width = char_width(c);
872+
873+
if pos + c_width > bounds.end {
874+
// We will only search for ANSI codes after breaking this
875+
// loop, so we can safely drop the remaining of `sub`
876+
break 'search_slice_end;
859877
}
878+
879+
pos += c_width;
880+
slice_end += c.len_utf8();
860881
}
861882
}
862883

863-
if let Some(buf) = rv {
864-
Cow::Owned(buf)
865-
} else {
866-
Cow::Borrowed(s)
884+
// Initialise the result (before appending remaining ANSI slices)
885+
let slice = &s[slice_start..slice_end];
886+
887+
let mut result = {
888+
if front_ansi.is_empty() && head.is_empty() && tail.is_empty() {
889+
// No allocation may have to be performed if there are no bounds.
890+
Cow::Borrowed(slice)
891+
} else {
892+
Cow::Owned(front_ansi + head + slice + tail)
893+
}
894+
};
895+
896+
// Push back remaining ANSI codes to result
897+
for (sub, is_ansi) in code_iter {
898+
if is_ansi {
899+
result.to_mut().push_str(sub);
900+
}
867901
}
868-
}
869902

903+
result
904+
}
870905
#[cfg(not(feature = "ansi-parsing"))]
871906
{
872-
if s.len() <= width - tail.len() {
873-
Cow::Borrowed(s)
907+
let slice = s.get(bounds).unwrap_or("");
908+
909+
if head.is_empty() && tail.is_empty() {
910+
Cow::Borrowed(slice)
874911
} else {
875-
Cow::Owned(format!(
876-
"{}{}",
877-
s.get(..width - tail.len()).unwrap_or_default(),
878-
tail
879-
))
912+
Cow::Owned(format!("{head}{slice}{tail}"))
880913
}
881914
}
882915
}
883916

917+
/// Truncates a string to a certain number of characters.
918+
///
919+
/// This ensures that escape codes are not screwed up in the process.
920+
/// If the maximum length is hit the string will be truncated but
921+
/// escapes code will still be honored. If truncation takes place
922+
/// the tail string will be appended.
923+
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
924+
let tail_width = measure_text_width(tail);
925+
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
926+
}
927+
884928
/// Pads a string to fill a certain number of characters.
885929
///
886930
/// This will honor ansi codes correctly and allows you to align a string
@@ -1000,11 +1044,64 @@ fn test_truncate_str() {
10001044
);
10011045
}
10021046

1047+
#[test]
1048+
fn test_slice_ansi_str() {
1049+
// Note that 🐶 is two columns wide
1050+
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
1051+
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
1052+
1053+
assert_eq!(
1054+
slice_str(test_str, ">>>", 0..test_str.len(), "<<<"),
1055+
format!(">>>{test_str}<<<"),
1056+
);
1057+
1058+
if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
1059+
assert_eq!(measure_text_width(test_str), 16);
1060+
1061+
assert_eq!(
1062+
slice_str(test_str, "", 5..5, ""),
1063+
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
1064+
);
1065+
1066+
assert_eq!(
1067+
slice_str(test_str, "", 0..5, ""),
1068+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1069+
);
1070+
1071+
assert_eq!(
1072+
slice_str(test_str, "", 0..6, ""),
1073+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1074+
);
1075+
1076+
assert_eq!(
1077+
slice_str(test_str, "", 0..7, ""),
1078+
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
1079+
);
1080+
1081+
assert_eq!(
1082+
slice_str(test_str, "", 4..9, ""),
1083+
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
1084+
);
1085+
1086+
assert_eq!(
1087+
slice_str(test_str, "", 7..21, ""),
1088+
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
1089+
);
1090+
1091+
assert_eq!(
1092+
slice_str(test_str, ">>>", 7..21, "<<<"),
1093+
"\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<"
1094+
);
1095+
}
1096+
}
1097+
10031098
#[test]
10041099
fn test_truncate_str_no_ansi() {
1100+
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo ba!");
10051101
assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
10061102
assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !");
10071103
assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");
1104+
assert_eq!(&truncate_str("foo bar", 2, "!!!"), "!!!");
10081105
}
10091106

10101107
#[test]

0 commit comments

Comments
 (0)