From 9e2092b80a8e36d2613df9d5aff7ca7d61d7802e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Dupr=C3=A9?= Date: Wed, 7 Feb 2024 18:14:16 +0100 Subject: [PATCH] Add new function ansi::slice_ansi_str --- src/lib.rs | 4 +- src/utils.rs | 215 +++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 159 insertions(+), 60 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2d3114b0..dc0459ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,8 +84,8 @@ pub use crate::term::{ }; pub use crate::utils::{ colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with, - set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute, - Color, Emoji, Style, StyledObject, + set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment, + Attribute, Color, Emoji, Style, StyledObject, }; #[cfg(feature = "ansi-parsing")] diff --git a/src/utils.rs b/src/utils.rs index 11d885a2..0b41bb6c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::env; use std::fmt; use std::fmt::{Debug, Formatter}; +use std::ops::Range; use std::sync::atomic::{AtomicBool, Ordering}; use once_cell::sync::Lazy; @@ -807,77 +808,124 @@ pub(crate) fn char_width(_c: char) -> usize { 1 } -/// Truncates a string to a certain number of characters. +/// Slice a `&str` in terms of text width. This means that only the text +/// columns strictly between `start` and `stop` will be kept. /// -/// This ensures that escape codes are not screwed up in the process. -/// If the maximum length is hit the string will be truncated but -/// escapes code will still be honored. If truncation takes place -/// the tail string will be appended. -pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { - if measure_text_width(s) <= width { - return Cow::Borrowed(s); - } - +/// If a multi-columns character overlaps with the end of the interval it will +/// not be included. In such a case, the result will be less than `end - start` +/// columns wide. +/// +/// This ensures that escape codes are not screwed up in the process. And if +/// non-empty head and tail are specified, they are inserted between the ANSI +/// codes from truncated bounds and the slice. +pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range, tail: &str) -> Cow<'a, str> { #[cfg(feature = "ansi-parsing")] { - use std::cmp::Ordering; - let mut iter = AnsiCodeIterator::new(s); - let mut length = 0; - let mut rv = None; - - while let Some(item) = iter.next() { - match item { - (s, false) => { - if rv.is_none() { - if str_width(s) + length > width.saturating_sub(str_width(tail)) { - let ts = iter.current_slice(); - - let mut s_byte = 0; - let mut s_width = 0; - let rest_width = width.saturating_sub(str_width(tail)).saturating_sub(length); - for c in s.chars() { - s_byte += c.len_utf8(); - s_width += char_width(c); - match s_width.cmp(&rest_width) { - Ordering::Equal => break, - Ordering::Greater => { - s_byte -= c.len_utf8(); - break; - } - Ordering::Less => continue, - } - } - - let idx = ts.len() - s.len() + s_byte; - let mut buf = ts[..idx].to_string(); - buf.push_str(tail); - rv = Some(buf); - } - length += str_width(s); + let mut pos = 0; // Current search index by width + let mut code_iter = AnsiCodeIterator::new(s).peekable(); + + // Search for the begining of the slice while collecting heading ANSI + // codes + let mut front_ansi = String::new(); // ANSI codes found before bound start + let mut slice_start = 0; // Current search index by bytes + + // Extract the leading slice, which *may be mutated* to remove just its first character. + 'search_slice_start: while pos < bounds.start { + let Some((sub, is_ansi)) = code_iter.peek_mut() else { + break; + }; + + if *is_ansi { + // Keep track of leading ANSI for later output. + front_ansi.push_str(sub); + slice_start += sub.len(); + } else { + for (c_idx, c) in sub.char_indices() { + if pos >= bounds.start { + // Ensure we don't drop the remaining of the slice before searching for the + // end bound. + *sub = &sub[c_idx..]; + break 'search_slice_start; } + + pos += char_width(c); + slice_start += c.len_utf8(); } - (s, true) => { - if let Some(ref mut rv) = rv { - rv.push_str(s); - } + } + + code_iter.next(); + } + + // Search for the end of the slice. This loop is a bit simpler because we don't need to + // keep track of remaining characters if we cut in the middle of a non-ANSI slice. + let mut slice_end = slice_start; + + 'search_slice_end: for (sub, is_ansi) in &mut code_iter { + if is_ansi { + // Keep ANSI in the output slice but don't account for them in the total width. + slice_end += sub.len(); + continue; + } + + for c in sub.chars() { + let c_width = char_width(c); + + if pos + c_width > bounds.end { + // We will only search for ANSI codes after breaking this + // loop, so we can safely drop the remaining of `sub` + break 'search_slice_end; } + + pos += c_width; + slice_end += c.len_utf8(); } } - if let Some(buf) = rv { - Cow::Owned(buf) - } else { - Cow::Borrowed(s) + // Initialise the result (before appending remaining ANSI slices) + let slice = &s[slice_start..slice_end]; + + let mut result = { + if front_ansi.is_empty() && head.is_empty() && tail.is_empty() { + // No allocation may have to be performed if there are no bounds. + Cow::Borrowed(slice) + } else { + Cow::Owned(front_ansi + head + slice + tail) + } + }; + + // Push back remaining ANSI codes to result + for (sub, is_ansi) in code_iter { + if is_ansi { + result.to_mut().push_str(sub); + } } - } + result + } #[cfg(not(feature = "ansi-parsing"))] { - Cow::Owned(format!( - "{}{}", - &s[..width.saturating_sub(tail.len())], - tail - )) + let slice = s.get(bounds).unwrap_or(""); + + if head.is_empty() && tail.is_empty() { + Cow::Borrowed(slice) + } else { + Cow::Owned(format!("{head}{slice}{tail}")) + } + } +} + +/// Truncates a string to a certain number of characters. +/// +/// This ensures that escape codes are not screwed up in the process. +/// If the maximum length is hit the string will be truncated but +/// escapes code will still be honored. If truncation takes place +/// the tail string will be appended. +pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { + if measure_text_width(s) <= width { + Cow::Borrowed(s) + } else { + let tail_width = measure_text_width(tail); + slice_str(s, "", 0..width.saturating_sub(tail_width), tail) } } @@ -1005,6 +1053,57 @@ fn test_truncate_str() { ); } +#[test] +fn test_slice_ansi_str() { + // Note that 🐶 is two columns wide + let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!"; + assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str); + + assert_eq!( + slice_str(test_str, ">>>", 0..test_str.len(), "<<<"), + format!(">>>{test_str}<<<"), + ); + + if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") { + assert_eq!(measure_text_width(test_str), 16); + + assert_eq!( + slice_str(test_str, "", 5..5, ""), + "\u{1b}[31m\u{1b}[1m\u{1b}[0m" + ); + + assert_eq!( + slice_str(test_str, "", 0..5, ""), + "Hello\x1b[31m\x1b[1m\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 0..6, ""), + "Hello\x1b[31m\x1b[1m\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 0..7, ""), + "Hello\x1b[31m🐶\x1b[1m\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 4..9, ""), + "o\x1b[31m🐶\x1b[1m🐶\x1b[0m" + ); + + assert_eq!( + slice_str(test_str, "", 7..21, ""), + "\x1b[31m\x1b[1m🐶\x1b[0m world!" + ); + + assert_eq!( + slice_str(test_str, ">>>", 7..21, "<<<"), + "\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<" + ); + } +} + #[test] fn test_truncate_str_no_ansi() { assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");