Skip to content

Commit

Permalink
Ensure line splitting distinguishes "foo" and "foo\n"
Browse files Browse the repository at this point in the history
We rely on being able to split lines and rejoin them to obtain the
original string. `str::lines()` in the Rust stdlib does not have this
property.

This was causing crashes in word-diffing on textual diffing, where
code paths differed on the number of lines they thought a string had.

This was broken in 8b84238.

Fixes #688.
  • Loading branch information
Wilfred committed Jul 20, 2024
1 parent efe1b10 commit ffe27c5
Show file tree
Hide file tree
Showing 10 changed files with 166 additions and 25 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
## 0.59 (unreleased)

### Diffing

Fixed crash on some textual files where a single change contained more than
1,000 words.

### Parsing

Added support for device tree and F#.
Expand Down
7 changes: 7 additions & 0 deletions sample_files/big_text_hunk_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks=
github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0=
github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8=
github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E=
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd h1:625/bJvSNfQrzzK5ttnUqMqnVe8M5MILmf5ZRGgeeDY=
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=
49 changes: 49 additions & 0 deletions sample_files/big_text_hunk_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks=
github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0=
github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8=
github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E=
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd h1:625/bJvSNfQrzzK5ttnUqMqnVe8M5MILmf5ZRGgeeDY=
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20220708220712-1185a9018129/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g=
golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
7 changes: 5 additions & 2 deletions sample_files/compare.expected
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ sample_files/b2_math_1.h sample_files/b2_math_2.h
sample_files/bad_combine_1.rs sample_files/bad_combine_2.rs
f5051bf7d2b8afa3a677388cbd458891 -

sample_files/big_text_hunk_1.txt sample_files/big_text_hunk_2.txt
fd0c8912c094097f82c6b29ae66fb912 -

sample_files/change_outer_1.el sample_files/change_outer_2.el
2b9334a4cc72da63bba28eff958f0038 -

Expand Down Expand Up @@ -140,7 +143,7 @@ sample_files/makefile_1.mk sample_files/makefile_2.mk
d0572210b5121ce68ac0ce45e43b922b -

sample_files/many_newlines_1.txt sample_files/many_newlines_2.txt
615de4b145b7b161e4fb285728280ed1 -
52ca05855e520876479e6f608c5e7831 -

sample_files/metadata_1.clj sample_files/metadata_2.clj
4b58ce366467c8cca46db53508e81323 -
Expand Down Expand Up @@ -293,5 +296,5 @@ sample_files/yaml_1.yaml sample_files/yaml_2.yaml
f068239fc7bade0e6de96d81136c1ac5 -

sample_files/zig_1.zig sample_files/zig_2.zig
4516796003b81f35bfa57d84bb7c0cbe -
e36d1ea126b8b68e3344434bb63f205e -

18 changes: 12 additions & 6 deletions src/display/inline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
use crate::{
constants::Side,
display::context::{calculate_after_context, calculate_before_context, opposite_positions},
display::hunks::Hunk,
display::style::{self, apply_colors, apply_line_number_color},
lines::{format_line_num, MaxLine},
display::{
context::{calculate_after_context, calculate_before_context, opposite_positions},
hunks::Hunk,
style::{self, apply_colors, apply_line_number_color},
},
lines::{format_line_num, split_on_newlines, MaxLine},
options::DisplayOptions,
parse::syntax::MatchedPos,
summary::FileFormat,
Expand Down Expand Up @@ -43,8 +45,12 @@ pub(crate) fn print(
)
} else {
(
lhs_src.lines().map(|s| format!("{}\n", s)).collect(),
rhs_src.lines().map(|s| format!("{}\n", s)).collect(),
split_on_newlines(lhs_src)
.map(|s| format!("{}\n", s))
.collect(),
split_on_newlines(rhs_src)
.map(|s| format!("{}\n", s))
.collect(),
)
};

Expand Down
39 changes: 29 additions & 10 deletions src/display/side_by_side.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@ use owo_colors::{OwoColorize, Style};

use crate::{
constants::Side,
display::context::all_matched_lines_filled,
display::hunks::{matched_lines_indexes_for_hunk, Hunk},
display::style::{
self, apply_colors, apply_line_number_color, color_positions, novel_style, replace_tabs,
split_and_apply, BackgroundColor,
display::{
context::all_matched_lines_filled,
hunks::{matched_lines_indexes_for_hunk, Hunk},
style::{
self, apply_colors, apply_line_number_color, color_positions, novel_style,
replace_tabs, split_and_apply, BackgroundColor,
},
},
hash::DftHashMap,
lines::format_line_num,
lines::{format_line_num, split_on_newlines},
options::{DisplayMode, DisplayOptions},
parse::syntax::{zip_pad_shorter, MatchedPos},
summary::FileFormat,
Expand Down Expand Up @@ -338,8 +340,12 @@ pub(crate) fn print(
)
} else {
(
lhs_src.lines().map(|s| format!("{}\n", s)).collect(),
rhs_src.lines().map(|s| format!("{}\n", s)).collect(),
split_on_newlines(lhs_src)
.map(|s| format!("{}\n", s))
.collect(),
split_on_newlines(rhs_src)
.map(|s| format!("{}\n", s))
.collect(),
)
};

Expand Down Expand Up @@ -401,8 +407,21 @@ pub(crate) fn print(
let mut prev_lhs_line_num = None;
let mut prev_rhs_line_num = None;

let lhs_lines = lhs_src.lines().collect::<Vec<_>>();
let rhs_lines = rhs_src.lines().collect::<Vec<_>>();
let mut lhs_lines = split_on_newlines(lhs_src).collect::<Vec<_>>();
let mut rhs_lines = split_on_newlines(rhs_src).collect::<Vec<_>>();

// If "foo" is one line, is "foo\n" two lines? Generally we want
// to care about newlines when deciding whether content differs.
//
// Ending a file with a trailing newline is extremely common
// though. If both files have a trailing newline, consider "foo\n"
// to be "foo" so we don't end up displaying a blank line on both
// sides.
if lhs_lines.last() == Some(&"") && rhs_lines.last() == Some(&"") {
lhs_lines.pop();
rhs_lines.pop();
}

let matched_lines = all_matched_lines_filled(lhs_mps, rhs_mps, &lhs_lines, &rhs_lines);
let mut matched_lines_to_print = &matched_lines[..];

Expand Down
3 changes: 2 additions & 1 deletion src/display/style.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use line_numbers::SingleLineSpan;
use owo_colors::{OwoColorize, Style};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

use crate::lines::split_on_newlines;
use crate::parse::syntax::StringKind;
use crate::{
constants::Side,
Expand Down Expand Up @@ -401,7 +402,7 @@ pub(crate) fn apply_colors(
positions: &[MatchedPos],
) -> Vec<String> {
let styles = color_positions(side, background, syntax_highlight, file_format, positions);
let lines = s.lines().collect::<Vec<_>>();
let lines = split_on_newlines(s).collect::<Vec<_>>();
style_lines(&lines, &styles)
}

Expand Down
49 changes: 49 additions & 0 deletions src/lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,21 @@ impl<S: AsRef<str>> MaxLine for S {
}
}

/// Split `s` on \n or \r\n. Always returns a non-empty vec. Each line
/// in the vec does not include the trailing newline.
///
/// This differs from `str::lines`, which considers `""` to be zero
/// lines and `"foo\n"` to be one line.
pub(crate) fn split_on_newlines(s: &str) -> impl Iterator<Item = &str> {
s.split('\n').map(|l| {
if let Some(l) = l.strip_suffix('\r') {
l
} else {
l
}
})
}

pub(crate) fn is_all_whitespace(s: &str) -> bool {
s.chars().all(|c| c.is_whitespace())
}
Expand Down Expand Up @@ -66,6 +81,40 @@ mod tests {
assert_eq!(line.max_line().0, 1);
}

#[test]
fn test_split_line_empty() {
assert_eq!(split_on_newlines("").collect::<Vec<_>>(), vec![""]);
}

#[test]
fn test_split_line_single() {
assert_eq!(split_on_newlines("foo").collect::<Vec<_>>(), vec!["foo"]);
}

#[test]
fn test_split_line_with_newline() {
assert_eq!(
split_on_newlines("foo\nbar").collect::<Vec<_>>(),
vec!["foo", "bar"]
);
}

#[test]
fn test_split_line_with_crlf() {
assert_eq!(
split_on_newlines("foo\r\nbar").collect::<Vec<_>>(),
vec!["foo", "bar"]
);
}

#[test]
fn test_split_line_with_trailing_newline() {
assert_eq!(
split_on_newlines("foo\nbar\n").collect::<Vec<_>>(),
vec!["foo", "bar", ""]
);
}

#[test]
fn test_is_all_whitespace() {
assert!(is_all_whitespace(" \n\t"));
Expand Down
8 changes: 5 additions & 3 deletions src/parse/guess_language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ pub(crate) fn language_name(language: Language) -> &'static str {

use Language::*;

use crate::lines::split_on_newlines;

/// File globs that identify languages based on the file path.
pub(crate) fn language_globs(language: Language) -> Vec<glob::Pattern> {
let glob_strs: &'static [&'static str] = match language {
Expand Down Expand Up @@ -420,7 +422,7 @@ fn looks_like_hacklang(path: &Path, src: &str) -> bool {
fn looks_like_objc(path: &Path, src: &str) -> bool {
if let Some(extension) = path.extension() {
if extension == "h" {
return src.lines().take(100).any(|line| {
return split_on_newlines(src).take(100).any(|line| {
["#import", "@interface", "@protocol"]
.iter()
.any(|keyword| line.starts_with(keyword))
Expand Down Expand Up @@ -497,7 +499,7 @@ fn from_emacs_mode_header(src: &str) -> Option<Language> {

// Emacs allows the mode header to occur on the second line if the
// first line is a shebang.
for line in src.lines().take(2) {
for line in split_on_newlines(src).take(2) {
let mode_name: String = match (MODE_RE.captures(line), SHORTHAND_RE.captures(line)) {
(Some(cap), _) | (_, Some(cap)) => cap[1].into(),
_ => "".into(),
Expand Down Expand Up @@ -559,7 +561,7 @@ fn from_shebang(src: &str) -> Option<Language> {
lazy_static! {
static ref RE: Regex = Regex::new(r"#! *(?:/usr/bin/env )?([^ ]+)").unwrap();
}
if let Some(first_line) = src.lines().next() {
if let Some(first_line) = split_on_newlines(src).next() {
if let Some(cap) = RE.captures(first_line) {
let interpreter_path = Path::new(&cap[1]);
if let Some(name) = interpreter_path.file_name() {
Expand Down
6 changes: 3 additions & 3 deletions src/parse/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use line_numbers::SingleLineSpan;
use typed_arena::Arena;

use self::Syntax::*;
use crate::lines::split_on_newlines;
use crate::words::split_words_and_numbers;
use crate::{
diff::changes::ChangeKind,
Expand Down Expand Up @@ -408,9 +409,8 @@ fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap<ContentKey, u32>)
..
} => {
let is_comment = *highlight == AtomKind::Comment;
let clean_content = if is_comment && content.lines().count() > 1 {
content
.lines()
let clean_content = if is_comment && split_on_newlines(content).count() > 1 {
split_on_newlines(content)
.map(|l| l.trim_start())
.collect::<Vec<_>>()
.join("\n")
Expand Down

0 comments on commit ffe27c5

Please sign in to comment.