Skip to content

Commit

Permalink
Auto merge of rust-lang#137027 - hkBst:single_pass_skip_ws, r=<try>
Browse files Browse the repository at this point in the history
Make skip_whitespace do a single pass

Offshoot from rust-lang#136931 that I'd like to see the performance impact of.

r? `@nnethercote`
  • Loading branch information
bors committed Feb 14, 2025
2 parents bdc97d1 + d15cd2e commit a13e5e9
Showing 1 changed file with 29 additions and 16 deletions.
45 changes: 29 additions & 16 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,10 @@ where
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\\' => {
match chars.clone().next() {
let mut chars_clone = chars.clone();
match chars_clone.next() {
Some('\n') => {
chars = chars_clone;
// Rust language specification requires us to skip whitespaces
// if unescaped '\' character is followed by '\n'.
// For details see [Rust language reference]
Expand All @@ -379,30 +381,41 @@ where
}
}

/// Skip ASCII whitespace, except for the formfeed character
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
/// Warns on unescaped newline and following non-ASCII whitespace.
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
where
F: FnMut(Range<usize>, EscapeError),
{
let tail = chars.as_str();
let first_non_space = tail
.bytes()
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
.unwrap_or(tail.len());
if tail[1..first_non_space].contains('\n') {
// The +1 accounts for the escaping slash.
let end = start + first_non_space + 1;
// the escaping slash and newline characters add 2 bytes
let mut end = start + 2;
let mut contains_nl = false;

// manual next_if loop
let mut next_char;
loop {
let mut chars_clone = chars.clone();
next_char = chars_clone.next();
match next_char {
Some(c) if c.is_ascii_whitespace() && c != '\x0c' => {
*chars = chars_clone;
end += 1;
contains_nl |= c == '\n';
}
_ => break,
}
}

if contains_nl {
callback(start..end, EscapeError::MultipleSkippedLinesWarning);
}
let tail = &tail[first_non_space..];
if let Some(c) = tail.chars().next() {
if let Some(c) = next_char {
if c.is_whitespace() {
// For error reporting, we would like the span to contain the character that was not
// skipped. The +1 is necessary to account for the leading \ that started the escape.
let end = start + first_non_space + c.len_utf8() + 1;
callback(start..end, EscapeError::UnskippedWhitespaceWarning);
// for error reporting, include the character that was not skipped in the span
callback(start..end + c.len_utf8(), EscapeError::UnskippedWhitespaceWarning);
}
}
*chars = tail.chars();
}

/// Takes a contents of a string literal (without quotes) and produces a
Expand Down

0 comments on commit a13e5e9

Please sign in to comment.