From 09355c6c21fc5062e28ff3320b3b8fb9c606d27c Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Sat, 11 Jan 2025 14:50:27 -0800 Subject: [PATCH] Make Windows-1252 detection stricter --- src/files.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/files.rs b/src/files.rs index ec3ac6592b..51c4b7633e 100644 --- a/src/files.rs +++ b/src/files.rs @@ -227,7 +227,14 @@ pub(crate) fn guess_content(bytes: &[u8]) -> ProbableFileKind { // ISO-8859-1 aka Latin 1), treat them as such. let (latin1_str, _encoding, saw_malformed) = encoding_rs::WINDOWS_1252.decode(bytes); if !saw_malformed { - return ProbableFileKind::Text(latin1_str.to_string()); + let num_null = utf16_string + .chars() + .take(5000) + .filter(|c| *c == '\0') + .count(); + if num_null <= 1 { + return ProbableFileKind::Text(latin1_str.to_string()); + } } ProbableFileKind::Binary