Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[red-knot] Error out when an mdtest code block is unterminated #14965

Merged
merged 5 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/red_knot_test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const MDTEST_TEST_FILTER: &str = "MDTEST_TEST_FILTER";
#[allow(clippy::print_stdout)]
pub fn run(path: &Utf8Path, long_title: &str, short_title: &str, test_name: &str) {
let source = std::fs::read_to_string(path).unwrap();
let suite = match test_parser::parse(short_title, &source) {
let suite = match test_parser::parse(short_title, &source, path) {
Ok(suite) => suite,
Err(err) => {
panic!("Error parsing `{path}`: {err:?}")
Expand Down
113 changes: 92 additions & 21 deletions crates/red_knot_test/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
use std::sync::LazyLock;

use anyhow::bail;
use camino::Utf8Path;
use memchr::memchr2;
use regex::{Captures, Match, Regex};
use rustc_hash::{FxHashMap, FxHashSet};

use ruff_index::{newtype_index, IndexVec};
use ruff_python_trivia::Cursor;
use ruff_text_size::{TextLen, TextSize};
use ruff_source_file::LineRanges;
use ruff_text_size::{TextLen, TextRange, TextSize};

use crate::config::MarkdownTestConfig;

/// Parse the Markdown `source` as a test suite with given `title`.
pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
let parser = Parser::new(title, source);
pub(crate) fn parse<'s>(
title: &'s str,
source: &'s str,
path: &'s Utf8Path,
) -> anyhow::Result<MarkdownTestSuite<'s>> {
let parser = Parser::new(title, source, path);
parser.parse()
}

Expand Down Expand Up @@ -156,8 +162,14 @@ static HEADER_RE: LazyLock<Regex> =
/// Matches a code block fenced by triple backticks, possibly with language and `key=val`
/// configuration items following the opening backticks (in the "tag string" of the code block).
static CODE_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^```(?<lang>(?-u:\w)+)?(?<config>(?: +\S+)*)\s*\n(?<code>(?:.|\n)*?)\n?```\s*\n?")
.unwrap()
Regex::new(
r"(?x)
^```(?<lang>(?-u:\w)+)?(?<config>(?:\x20+\S+)*)\s*\n
(?<code>(?:.|\n)*?)\n?
(?<end>```|\z)
",
)
.unwrap()
});

#[derive(Debug)]
Expand Down Expand Up @@ -202,7 +214,9 @@ struct Parser<'s> {
/// The unparsed remainder of the Markdown source.
cursor: Cursor<'s>,

source: &'s str,
source_len: TextSize,
source_path: &'s Utf8Path,

/// Stack of ancestor sections.
stack: SectionStack,
Expand All @@ -215,7 +229,7 @@ struct Parser<'s> {
}

impl<'s> Parser<'s> {
fn new(title: &'s str, source: &'s str) -> Self {
fn new(title: &'s str, source: &'s str, path: &'s Utf8Path) -> Self {
let mut sections = IndexVec::default();
let root_section_id = sections.push(Section {
title,
Expand All @@ -227,7 +241,9 @@ impl<'s> Parser<'s> {
sections,
files: IndexVec::default(),
cursor: Cursor::new(source),
source,
source_len: source.text_len(),
source_path: path,
stack: SectionStack::new(root_section_id),
current_section_files: None,
current_section_has_config: false,
Expand Down Expand Up @@ -328,6 +344,16 @@ impl<'s> Parser<'s> {
// We never pop the implicit root section.
let section = self.stack.top();

if captures.name("end").unwrap().is_empty() {
let code_block_start = self.cursor.token_len();
let row = self.source.count_lines(TextRange::up_to(code_block_start)) + 1;

return Err(anyhow::anyhow!(
"Unterminated code block at: {}:{row}:0",
self.source_path
));
}

let mut config: FxHashMap<&'s str, &'s str> = FxHashMap::default();

if let Some(config_match) = captures.name("config") {
Expand Down Expand Up @@ -422,11 +448,18 @@ impl<'s> Parser<'s> {

#[cfg(test)]
mod tests {
use crate::parser::MarkdownTestSuite;
use camino::Utf8Path;
use ruff_python_trivia::textwrap::dedent;

fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
let path = Utf8Path::new("/home/project/file.md");
super::parse(title, source, path)
}

#[test]
fn empty() {
let mf = super::parse("file.md", "").unwrap();
let mf = parse("file.md", "").unwrap();

assert!(mf.tests().next().is_none());
}
Expand All @@ -440,7 +473,7 @@ mod tests {
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
Expand All @@ -465,7 +498,7 @@ mod tests {
x = 1
```",
);
let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
Expand Down Expand Up @@ -499,7 +532,7 @@ mod tests {
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test1, test2] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected two tests");
Expand Down Expand Up @@ -546,7 +579,7 @@ mod tests {
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test1, test2] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected two tests");
Expand Down Expand Up @@ -585,7 +618,7 @@ mod tests {
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
Expand All @@ -609,7 +642,7 @@ mod tests {
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
Expand All @@ -630,7 +663,7 @@ mod tests {
",
);

let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
Expand All @@ -652,7 +685,7 @@ mod tests {
",
);

let mf = super::parse("file.md", &source).unwrap();
let mf = parse("file.md", &source).unwrap();

let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
Expand All @@ -664,6 +697,44 @@ mod tests {
assert_eq!(file.code, "x = 10");
}

#[test]
fn unterminated_code_block_1() {
let source = dedent(
"
```
x = 1
",
);
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Unterminated code block at: /home/project/file.md:2:0"
);
}

#[test]
fn unterminated_code_block_2() {
let source = dedent(
"
## A well-fenced block

```
y = 2
```

## A not-so-well-fenced block

```
x = 1
",
);
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Unterminated code block at: /home/project/file.md:10:0"
);
}

#[test]
fn no_header_inside_test() {
let source = dedent(
Expand All @@ -677,7 +748,7 @@ mod tests {
## Two
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Header 'Two' not valid inside a test case; parent 'One' has code files."
Expand All @@ -693,7 +764,7 @@ mod tests {
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(err.to_string(), "Invalid config item `foo`.");
}

Expand All @@ -706,7 +777,7 @@ mod tests {
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(err.to_string(), "Invalid config item `foo=bar=baz`.");
}

Expand All @@ -719,7 +790,7 @@ mod tests {
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(err.to_string(), "Duplicate config item `foo=baz`.");
}

Expand All @@ -736,7 +807,7 @@ mod tests {
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Test `file.md` has duplicate files named `test.py`. \
Expand All @@ -758,7 +829,7 @@ mod tests {
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
let err = parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Test `file.md` has duplicate files named `foo.py`."
Expand Down
Loading