From 23a43e69ff4859d22dba65a8b92eeae8d4c841b7 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Mon, 5 May 2025 08:22:14 +0200 Subject: [PATCH 1/6] fix(lexer): handle single --- crates/pgt_lexer/src/lib.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index 32bbdd42..35ff341d 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -66,7 +66,8 @@ static PATTERN_LEXER: LazyLock = LazyLock::new(|| { #[cfg(windows)] { // On Windows, treat \r\n as a single newline token - Regex::new(r"(?P +)|(?P(\r\n|\n)+)|(?P\t+)").unwrap() + // and treat \r as a whitespace token + Regex::new(r"(?P (+|\r))|(?P(\r\n|\n)+)|(?P\t+)").unwrap() } #[cfg(not(windows))] { @@ -206,6 +207,15 @@ mod tests { assert_eq!(tokens[1].kind, SyntaxKind::Tab); } + #[test] + #[cfg(windows)] + fn test_carriage_return() { + let input = "select\r\n\r1"; + let tokens = lex(input).unwrap(); + assert_eq!(tokens[1].kind, SyntaxKind::Newline); + assert_eq!(tokens[2].kind, SyntaxKind::Whitespace); + } + #[test] fn test_newline_tokens() { let input = "select\n1"; @@ -217,7 +227,7 @@ mod tests { fn test_consecutive_newlines() { // Test with multiple consecutive newlines #[cfg(windows)] - let input = "select\r\n\r\n1"; + let input = "select\r\n\r\n\r1"; #[cfg(not(windows))] let input = "select\n\n1"; @@ -226,6 +236,7 @@ mod tests { // Check that we have exactly one newline token between "select" and "1" assert_eq!(tokens[0].kind, SyntaxKind::Select); assert_eq!(tokens[1].kind, SyntaxKind::Newline); + assert_eq!(tokens[1].kind, SyntaxKind::Whitespace); assert_eq!(tokens[2].kind, SyntaxKind::Iconst); } From fd4e8007977adf3434b82bad886b0422ff8a8c66 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Mon, 5 May 2025 08:32:53 +0200 Subject: [PATCH 2/6] ups --- crates/pgt_lexer/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index 35ff341d..50c8b670 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -229,7 +229,7 @@ mod tests { #[cfg(windows)] let input = "select\r\n\r\n\r1"; #[cfg(not(windows))] - let input = "select\n\n1"; + let input = "select\n\n 1"; let tokens = lex(input).unwrap(); From 95140ae3420a2ba92cbff844bc59400bd000ec39 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Mon, 5 May 2025 08:40:50 +0200 Subject: [PATCH 3/6] ups --- crates/pgt_lexer/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index 50c8b670..c6b768d2 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -236,8 +236,8 @@ mod tests { // Check that we have exactly one newline token between "select" and "1" assert_eq!(tokens[0].kind, SyntaxKind::Select); assert_eq!(tokens[1].kind, SyntaxKind::Newline); - assert_eq!(tokens[1].kind, SyntaxKind::Whitespace); - assert_eq!(tokens[2].kind, SyntaxKind::Iconst); + assert_eq!(tokens[2].kind, SyntaxKind::Whitespace); + assert_eq!(tokens[3].kind, SyntaxKind::Iconst); } #[test] From 1e49a149286b92c176e117271b944ed0ccfc4cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Steinr=C3=B6tter?= Date: Mon, 5 May 2025 09:30:41 +0200 Subject: [PATCH 4/6] Update lib.rs --- crates/pgt_lexer/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index c6b768d2..819e409a 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -67,7 +67,7 @@ static PATTERN_LEXER: LazyLock = LazyLock::new(|| { { // On Windows, treat \r\n as a single newline token // and treat \r as a whitespace token - Regex::new(r"(?P (+|\r))|(?P(\r\n|\n)+)|(?P\t+)").unwrap() + Regex::new(r"(?P ( |\r)+)|(?P(\r\n|\n)+)|(?P\t+)").unwrap() } #[cfg(not(windows))] { From 9a9953080964d85fcf95a9ed0772ccf85a7e104c Mon Sep 17 00:00:00 2001 From: psteinroe Date: Mon, 5 May 2025 09:55:24 +0200 Subject: [PATCH 5/6] make it a newline --- crates/pgt_lexer/src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index 819e409a..677ca869 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -67,7 +67,7 @@ static PATTERN_LEXER: LazyLock = LazyLock::new(|| { { // On Windows, treat \r\n as a single newline token // and treat \r as a whitespace token - Regex::new(r"(?P ( |\r)+)|(?P(\r\n|\n)+)|(?P\t+)").unwrap() + Regex::new(r"(?P +)|(?P(\r\n|\n|\r)+)|(?P\t+)").unwrap() } #[cfg(not(windows))] { @@ -236,8 +236,7 @@ mod tests { // Check that we have exactly one newline token between "select" and "1" assert_eq!(tokens[0].kind, SyntaxKind::Select); assert_eq!(tokens[1].kind, SyntaxKind::Newline); - assert_eq!(tokens[2].kind, SyntaxKind::Whitespace); - assert_eq!(tokens[3].kind, SyntaxKind::Iconst); + assert_eq!(tokens[2].kind, SyntaxKind::Iconst); } #[test] From 41e5196ef5f6c63eb9823b4485cbeb1e9a28f604 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Mon, 5 May 2025 10:19:53 +0200 Subject: [PATCH 6/6] fix --- crates/pgt_lexer/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index 677ca869..44fbca94 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -229,7 +229,7 @@ mod tests { #[cfg(windows)] let input = "select\r\n\r\n\r1"; #[cfg(not(windows))] - let input = "select\n\n 1"; + let input = "select\n\n1"; let tokens = lex(input).unwrap();