From 45cdfffbca459b463dea2f4b2e37081ad9de1a1b Mon Sep 17 00:00:00 2001 From: Augustin Mauroy <97875033+AugustinMauroy@users.noreply.github.com> Date: Tue, 25 Feb 2025 22:47:01 +0100 Subject: [PATCH 1/5] feat(parse/md): markdown header --- .../src/generated/node_factory.rs | 174 +++++ .../src/generated/syntax_factory.rs | 198 ++++++ crates/biome_markdown_parser/src/lexer/mod.rs | 37 ++ .../biome_markdown_parser/src/lexer/tests.rs | 60 ++ .../src/generated/kind.rs | 6 + .../src/generated/macros.rs | 24 + .../src/generated/nodes.rs | 612 ++++++++++++++++++ .../src/generated/nodes_mut.rs | 120 ++++ xtask/codegen/markdown.ungram | 7 + xtask/codegen/src/markdown_kinds_src.rs | 6 + 10 files changed, 1244 insertions(+) diff --git a/crates/biome_markdown_factory/src/generated/node_factory.rs b/crates/biome_markdown_factory/src/generated/node_factory.rs index 42d59769621c..f5f9a028ea5e 100644 --- a/crates/biome_markdown_factory/src/generated/node_factory.rs +++ b/crates/biome_markdown_factory/src/generated/node_factory.rs @@ -87,6 +87,180 @@ impl MdHeaderBuilder { )) } } +pub fn md_header1(before: MdHashList, after: MdHashList) -> MdHeader1Builder { + MdHeader1Builder { + before, + after, + md_paragraph: None, + } +} +pub struct MdHeader1Builder { + before: MdHashList, + after: MdHashList, + md_paragraph: Option, +} +impl MdHeader1Builder { + pub fn with_md_paragraph(mut self, md_paragraph: MdParagraph) -> Self { + self.md_paragraph = Some(md_paragraph); + self + } + pub fn build(self) -> MdHeader1 { + MdHeader1::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_HEADER1, + [ + Some(SyntaxElement::Node(self.before.into_syntax())), + self.md_paragraph + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Node(self.after.into_syntax())), + ], + )) + } +} +pub fn md_header2(before: MdHashList, after: MdHashList) -> MdHeader2Builder { + MdHeader2Builder { + before, + after, + md_paragraph: None, + } +} +pub struct MdHeader2Builder { + before: MdHashList, + after: MdHashList, + md_paragraph: Option, +} +impl MdHeader2Builder { + pub fn with_md_paragraph(mut self, md_paragraph: MdParagraph) -> Self { + self.md_paragraph = Some(md_paragraph); + self + } + pub fn build(self) -> MdHeader2 { + MdHeader2::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_HEADER2, + [ + Some(SyntaxElement::Node(self.before.into_syntax())), + self.md_paragraph + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Node(self.after.into_syntax())), + ], + )) + } +} +pub fn md_header3(before: MdHashList, after: MdHashList) -> MdHeader3Builder { + MdHeader3Builder { + before, + after, + md_paragraph: None, + } +} +pub struct MdHeader3Builder { + before: MdHashList, + after: MdHashList, + md_paragraph: Option, +} +impl MdHeader3Builder { + pub fn with_md_paragraph(mut self, md_paragraph: MdParagraph) -> Self { + self.md_paragraph = Some(md_paragraph); + self + } + pub fn build(self) -> MdHeader3 { + MdHeader3::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_HEADER3, + [ + Some(SyntaxElement::Node(self.before.into_syntax())), + self.md_paragraph + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Node(self.after.into_syntax())), + ], + )) + } +} +pub fn md_header4(before: MdHashList, after: MdHashList) -> MdHeader4Builder { + MdHeader4Builder { + before, + after, + md_paragraph: None, + } +} +pub struct MdHeader4Builder { + before: MdHashList, + after: MdHashList, + md_paragraph: Option, +} +impl MdHeader4Builder { + pub fn with_md_paragraph(mut self, md_paragraph: MdParagraph) -> Self { + self.md_paragraph = Some(md_paragraph); + self + } + pub fn build(self) -> MdHeader4 { + MdHeader4::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_HEADER4, + [ + Some(SyntaxElement::Node(self.before.into_syntax())), + self.md_paragraph + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Node(self.after.into_syntax())), + ], + )) + } +} +pub fn md_header5(before: MdHashList, after: MdHashList) -> MdHeader5Builder { + MdHeader5Builder { + before, + after, + md_paragraph: None, + } +} +pub struct MdHeader5Builder { + before: MdHashList, + after: MdHashList, + md_paragraph: Option, +} +impl MdHeader5Builder { + pub fn with_md_paragraph(mut self, md_paragraph: MdParagraph) -> Self { + self.md_paragraph = Some(md_paragraph); + self + } + pub fn build(self) -> MdHeader5 { + MdHeader5::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_HEADER5, + [ + Some(SyntaxElement::Node(self.before.into_syntax())), + self.md_paragraph + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Node(self.after.into_syntax())), + ], + )) + } +} +pub fn md_header6(before: MdHashList, after: MdHashList) -> MdHeader6Builder { + MdHeader6Builder { + before, + after, + md_paragraph: None, + } +} +pub struct MdHeader6Builder { + before: MdHashList, + after: MdHashList, + md_paragraph: Option, +} +impl MdHeader6Builder { + pub fn with_md_paragraph(mut self, md_paragraph: MdParagraph) -> Self { + self.md_paragraph = Some(md_paragraph); + self + } + pub fn build(self) -> MdHeader6 { + MdHeader6::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_HEADER6, + [ + Some(SyntaxElement::Node(self.before.into_syntax())), + self.md_paragraph + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Node(self.after.into_syntax())), + ], + )) + } +} pub fn md_html_block(md_textual: MdTextual) -> MdHtmlBlock { MdHtmlBlock::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_HTML_BLOCK, diff --git a/crates/biome_markdown_factory/src/generated/syntax_factory.rs b/crates/biome_markdown_factory/src/generated/syntax_factory.rs index c112b6bc4428..ca911fcef1f6 100644 --- a/crates/biome_markdown_factory/src/generated/syntax_factory.rs +++ b/crates/biome_markdown_factory/src/generated/syntax_factory.rs @@ -154,6 +154,204 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_HEADER, children) } + MD_HEADER1 => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdParagraph::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_HEADER1.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_HEADER1, children) + } + MD_HEADER2 => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdParagraph::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_HEADER2.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_HEADER2, children) + } + MD_HEADER3 => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdParagraph::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_HEADER3.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_HEADER3, children) + } + MD_HEADER4 => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdParagraph::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_HEADER4.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_HEADER4, children) + } + MD_HEADER5 => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdParagraph::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_HEADER5.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_HEADER5, children) + } + MD_HEADER6 => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdParagraph::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if let Some(element) = ¤t_element { + if MdHashList::can_cast(element.kind()) { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_HEADER6.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_HEADER6, children) + } MD_HTML_BLOCK => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index 34753be9f6c4..8fc4304e9625 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -181,10 +181,31 @@ impl<'src> MarkdownLexer<'src> { match dispatched { WHS => self.consume_newline_or_whitespace(), MUL | MIN | IDT => self.consume_thematic_break_literal(), + HAS => self.consume_header(), _ => self.consume_textual(), } } + fn consume_header(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + let mut level = 0; + while matches!(self.current_byte(), Some(b'#')) { + self.advance(1); + level += 1; + } + + match level { + 1 => MD_HEADER1, + 2 => MD_HEADER2, + 3 => MD_HEADER3, + 4 => MD_HEADER4, + 5 => MD_HEADER5, + 6 => MD_HEADER6, + _ => ERROR_TOKEN, + } + } + fn text_position(&self) -> TextSize { TextSize::try_from(self.position).expect("Input to be smaller than 4 GB") } @@ -356,9 +377,25 @@ impl<'src> MarkdownLexer<'src> { fn consume_textual(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); + // Consume the first character let char = self.current_char_unchecked(); self.advance(char.len_utf8()); + // Continue consuming characters until we hit a newline or another special markdown character + // But allow spaces within text content + while let Some(byte) = self.current_byte() { + match byte { + // Stop at newlines or special Markdown syntax characters, + // but NOT spaces (removed b' ' from this list) + b'\n' | b'\r' | b'\t' | b'#' | b'*' | b'-' | b'_' => break, + _ => { + // Consume this character and continue + let next_char = self.current_char_unchecked(); + self.advance(next_char.len_utf8()); + } + } + } + MD_TEXTUAL_LITERAL } diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index 056ec8c5284b..cff6f7d3343f 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -140,6 +140,66 @@ fn whitespace() { } } +#[test] +fn heading_level_1() { + assert_lex! { + "# Heading 1", + MD_HEADER1:1, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + } +} + +#[test] +fn heading_level_2() { + assert_lex! { + "## Heading 2", + MD_HEADER2:2, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + } +} + +#[test] +fn heading_level_3() { + assert_lex! { + "### Heading 3", + MD_HEADER3:3, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + } +} + +#[test] +fn heading_level_4() { + assert_lex! { + "#### Heading 4", + MD_HEADER4:4, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + } +} + +#[test] +fn heading_level_5() { + assert_lex! { + "##### Heading 5", + MD_HEADER5:5, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + } +} + +#[test] +fn heading_level_6() { + assert_lex! { + "###### Heading 6", + MD_HEADER6:6, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + } +} + #[test] fn thematic_break_literal() { assert_lex! { diff --git a/crates/biome_markdown_syntax/src/generated/kind.rs b/crates/biome_markdown_syntax/src/generated/kind.rs index 824489e68e5f..0b1ef1dde743 100644 --- a/crates/biome_markdown_syntax/src/generated/kind.rs +++ b/crates/biome_markdown_syntax/src/generated/kind.rs @@ -47,6 +47,12 @@ pub enum MarkdownSyntaxKind { MD_HASH_LIST, MD_HASH, MD_HEADER, + MD_HEADER1, + MD_HEADER2, + MD_HEADER3, + MD_HEADER4, + MD_HEADER5, + MD_HEADER6, MD_INDENT_CODE_BLOCK, MD_FENCED_CODE_BLOCK, MD_HTML_BLOCK, diff --git a/crates/biome_markdown_syntax/src/generated/macros.rs b/crates/biome_markdown_syntax/src/generated/macros.rs index fc61b8759e44..9b9887953503 100644 --- a/crates/biome_markdown_syntax/src/generated/macros.rs +++ b/crates/biome_markdown_syntax/src/generated/macros.rs @@ -40,6 +40,30 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdHeader::new_unchecked(node) }; $body } + $crate::MarkdownSyntaxKind::MD_HEADER1 => { + let $pattern = unsafe { $crate::MdHeader1::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_HEADER2 => { + let $pattern = unsafe { $crate::MdHeader2::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_HEADER3 => { + let $pattern = unsafe { $crate::MdHeader3::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_HEADER4 => { + let $pattern = unsafe { $crate::MdHeader4::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_HEADER5 => { + let $pattern = unsafe { $crate::MdHeader5::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_HEADER6 => { + let $pattern = unsafe { $crate::MdHeader6::new_unchecked(node) }; + $body + } $crate::MarkdownSyntaxKind::MD_HTML_BLOCK => { let $pattern = unsafe { $crate::MdHtmlBlock::new_unchecked(node) }; $body diff --git a/crates/biome_markdown_syntax/src/generated/nodes.rs b/crates/biome_markdown_syntax/src/generated/nodes.rs index 80c2ca083975..960aa97fde93 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes.rs @@ -251,6 +251,276 @@ pub struct MdHeaderFields { pub after: MdHashList, } #[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdHeader1 { + pub(crate) syntax: SyntaxNode, +} +impl MdHeader1 { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdHeader1Fields { + MdHeader1Fields { + before: self.before(), + md_paragraph: self.md_paragraph(), + after: self.after(), + } + } + pub fn before(&self) -> MdHashList { + support::list(&self.syntax, 0usize) + } + pub fn md_paragraph(&self) -> Option { + support::node(&self.syntax, 1usize) + } + pub fn after(&self) -> MdHashList { + support::list(&self.syntax, 2usize) + } +} +impl Serialize for MdHeader1 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdHeader1Fields { + pub before: MdHashList, + pub md_paragraph: Option, + pub after: MdHashList, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdHeader2 { + pub(crate) syntax: SyntaxNode, +} +impl MdHeader2 { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdHeader2Fields { + MdHeader2Fields { + before: self.before(), + md_paragraph: self.md_paragraph(), + after: self.after(), + } + } + pub fn before(&self) -> MdHashList { + support::list(&self.syntax, 0usize) + } + pub fn md_paragraph(&self) -> Option { + support::node(&self.syntax, 1usize) + } + pub fn after(&self) -> MdHashList { + support::list(&self.syntax, 2usize) + } +} +impl Serialize for MdHeader2 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdHeader2Fields { + pub before: MdHashList, + pub md_paragraph: Option, + pub after: MdHashList, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdHeader3 { + pub(crate) syntax: SyntaxNode, +} +impl MdHeader3 { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdHeader3Fields { + MdHeader3Fields { + before: self.before(), + md_paragraph: self.md_paragraph(), + after: self.after(), + } + } + pub fn before(&self) -> MdHashList { + support::list(&self.syntax, 0usize) + } + pub fn md_paragraph(&self) -> Option { + support::node(&self.syntax, 1usize) + } + pub fn after(&self) -> MdHashList { + support::list(&self.syntax, 2usize) + } +} +impl Serialize for MdHeader3 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdHeader3Fields { + pub before: MdHashList, + pub md_paragraph: Option, + pub after: MdHashList, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdHeader4 { + pub(crate) syntax: SyntaxNode, +} +impl MdHeader4 { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdHeader4Fields { + MdHeader4Fields { + before: self.before(), + md_paragraph: self.md_paragraph(), + after: self.after(), + } + } + pub fn before(&self) -> MdHashList { + support::list(&self.syntax, 0usize) + } + pub fn md_paragraph(&self) -> Option { + support::node(&self.syntax, 1usize) + } + pub fn after(&self) -> MdHashList { + support::list(&self.syntax, 2usize) + } +} +impl Serialize for MdHeader4 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdHeader4Fields { + pub before: MdHashList, + pub md_paragraph: Option, + pub after: MdHashList, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdHeader5 { + pub(crate) syntax: SyntaxNode, +} +impl MdHeader5 { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdHeader5Fields { + MdHeader5Fields { + before: self.before(), + md_paragraph: self.md_paragraph(), + after: self.after(), + } + } + pub fn before(&self) -> MdHashList { + support::list(&self.syntax, 0usize) + } + pub fn md_paragraph(&self) -> Option { + support::node(&self.syntax, 1usize) + } + pub fn after(&self) -> MdHashList { + support::list(&self.syntax, 2usize) + } +} +impl Serialize for MdHeader5 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdHeader5Fields { + pub before: MdHashList, + pub md_paragraph: Option, + pub after: MdHashList, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdHeader6 { + pub(crate) syntax: SyntaxNode, +} +impl MdHeader6 { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdHeader6Fields { + MdHeader6Fields { + before: self.before(), + md_paragraph: self.md_paragraph(), + after: self.after(), + } + } + pub fn before(&self) -> MdHashList { + support::list(&self.syntax, 0usize) + } + pub fn md_paragraph(&self) -> Option { + support::node(&self.syntax, 1usize) + } + pub fn after(&self) -> MdHashList { + support::list(&self.syntax, 2usize) + } +} +impl Serialize for MdHeader6 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdHeader6Fields { + pub before: MdHashList, + pub md_paragraph: Option, + pub after: MdHashList, +} +#[derive(Clone, PartialEq, Eq, Hash)] pub struct MdHtmlBlock { pub(crate) syntax: SyntaxNode, } @@ -1279,6 +1549,318 @@ impl From for SyntaxElement { n.syntax.into() } } +impl AstNode for MdHeader1 { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER1 as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HEADER1 + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdHeader1 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHeader1") + .field("before", &self.before()) + .field( + "md_paragraph", + &support::DebugOptionalElement(self.md_paragraph()), + ) + .field("after", &self.after()) + .finish() + } else { + f.debug_struct("MdHeader1").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdHeader1) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdHeader1) -> SyntaxElement { + n.syntax.into() + } +} +impl AstNode for MdHeader2 { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER2 as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HEADER2 + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdHeader2 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHeader2") + .field("before", &self.before()) + .field( + "md_paragraph", + &support::DebugOptionalElement(self.md_paragraph()), + ) + .field("after", &self.after()) + .finish() + } else { + f.debug_struct("MdHeader2").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdHeader2) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdHeader2) -> SyntaxElement { + n.syntax.into() + } +} +impl AstNode for MdHeader3 { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER3 as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HEADER3 + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdHeader3 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHeader3") + .field("before", &self.before()) + .field( + "md_paragraph", + &support::DebugOptionalElement(self.md_paragraph()), + ) + .field("after", &self.after()) + .finish() + } else { + f.debug_struct("MdHeader3").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdHeader3) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdHeader3) -> SyntaxElement { + n.syntax.into() + } +} +impl AstNode for MdHeader4 { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER4 as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HEADER4 + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdHeader4 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHeader4") + .field("before", &self.before()) + .field( + "md_paragraph", + &support::DebugOptionalElement(self.md_paragraph()), + ) + .field("after", &self.after()) + .finish() + } else { + f.debug_struct("MdHeader4").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdHeader4) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdHeader4) -> SyntaxElement { + n.syntax.into() + } +} +impl AstNode for MdHeader5 { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER5 as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HEADER5 + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdHeader5 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHeader5") + .field("before", &self.before()) + .field( + "md_paragraph", + &support::DebugOptionalElement(self.md_paragraph()), + ) + .field("after", &self.after()) + .finish() + } else { + f.debug_struct("MdHeader5").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdHeader5) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdHeader5) -> SyntaxElement { + n.syntax.into() + } +} +impl AstNode for MdHeader6 { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER6 as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HEADER6 + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdHeader6 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHeader6") + .field("before", &self.before()) + .field( + "md_paragraph", + &support::DebugOptionalElement(self.md_paragraph()), + ) + .field("after", &self.after()) + .finish() + } else { + f.debug_struct("MdHeader6").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdHeader6) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdHeader6) -> SyntaxElement { + n.syntax.into() + } +} impl AstNode for MdHtmlBlock { type Language = Language; const KIND_SET: SyntaxKindSet = @@ -2513,6 +3095,36 @@ impl std::fmt::Display for MdHeader { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for MdHeader1 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdHeader2 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdHeader3 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdHeader4 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdHeader5 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdHeader6 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for MdHtmlBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) diff --git a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs index 00ccf95b5ac7..d998fb9d1c39 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs @@ -75,6 +75,126 @@ impl MdHeader { ) } } +impl MdHeader1 { + pub fn with_before(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_md_paragraph(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 1usize..=1usize, + once(element.map(|element| element.into_syntax().into())), + )) + } + pub fn with_after(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdHeader2 { + pub fn with_before(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_md_paragraph(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 1usize..=1usize, + once(element.map(|element| element.into_syntax().into())), + )) + } + pub fn with_after(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdHeader3 { + pub fn with_before(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_md_paragraph(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 1usize..=1usize, + once(element.map(|element| element.into_syntax().into())), + )) + } + pub fn with_after(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdHeader4 { + pub fn with_before(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_md_paragraph(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 1usize..=1usize, + once(element.map(|element| element.into_syntax().into())), + )) + } + pub fn with_after(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdHeader5 { + pub fn with_before(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_md_paragraph(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 1usize..=1usize, + once(element.map(|element| element.into_syntax().into())), + )) + } + pub fn with_after(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdHeader6 { + pub fn with_before(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_md_paragraph(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 1usize..=1usize, + once(element.map(|element| element.into_syntax().into())), + )) + } + pub fn with_after(self, element: MdHashList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } +} impl MdHtmlBlock { pub fn with_md_textual(self, element: MdTextual) -> Self { Self::unwrap_cast( diff --git a/xtask/codegen/markdown.ungram b/xtask/codegen/markdown.ungram index 63f1094b4a23..e3cb7a898201 100644 --- a/xtask/codegen/markdown.ungram +++ b/xtask/codegen/markdown.ungram @@ -58,6 +58,13 @@ AnyContainerBlock = MdQuote | MdBulletListItem | MdOrderListItem // h1..h6 MdHeader = before:MdHashList MdParagraph? after:MdHashList +MdHeader1 = before:MdHashList MdParagraph? after:MdHashList +MdHeader2 = before:MdHashList MdParagraph? after:MdHashList +MdHeader3 = before:MdHashList MdParagraph? after:MdHashList +MdHeader4 = before:MdHashList MdParagraph? after:MdHashList +MdHeader5 = before:MdHashList MdParagraph? after:MdHashList +MdHeader6 = before:MdHashList MdParagraph? after:MdHashList + MdHashList = MdHash* MdHash = '#' diff --git a/xtask/codegen/src/markdown_kinds_src.rs b/xtask/codegen/src/markdown_kinds_src.rs index b0254d9fc773..821292cde704 100644 --- a/xtask/codegen/src/markdown_kinds_src.rs +++ b/xtask/codegen/src/markdown_kinds_src.rs @@ -41,6 +41,12 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { "MD_HASH_LIST", "MD_HASH", "MD_HEADER", + "MD_HEADER1", + "MD_HEADER2", + "MD_HEADER3", + "MD_HEADER4", + "MD_HEADER5", + "MD_HEADER6", "MD_INDENT_CODE_BLOCK", "MD_FENCED_CODE_BLOCK", "MD_HTML_BLOCK", From d1463f57f9f5b291b95bf26ab6bf3cd3b87373fe Mon Sep 17 00:00:00 2001 From: Augustin Mauroy <97875033+AugustinMauroy@users.noreply.github.com> Date: Wed, 26 Feb 2025 15:52:40 +0100 Subject: [PATCH 2/5] test: add more case --- crates/biome_markdown_parser/src/lexer/mod.rs | 2 -- .../biome_markdown_parser/src/lexer/tests.rs | 11 +++++++ .../tests/md_test_suite/ok/empty.md | 0 .../tests/md_test_suite/ok/empty.md.snap | 31 +++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md.snap diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index 8fc4304e9625..a1d42742c911 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -1,5 +1,3 @@ -//! An extremely fast, lookup table based, JSON lexer which yields SyntaxKind tokens used by the rome-json parser. - #[rustfmt::skip] mod tests; diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index cff6f7d3343f..2a46fd75d917 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -150,6 +150,17 @@ fn heading_level_1() { } } +#[test] +fn heading_level_1_with_newline() { + assert_lex! { + "# Heading 1\n", + MD_HEADER1:1, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:9, + NEWLINE:1, + } +} + #[test] fn heading_level_2() { assert_lex! { diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md.snap new file mode 100644 index 000000000000..0592be92d021 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/empty.md.snap @@ -0,0 +1,31 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +snapshot_kind: text +--- +## Input + +``` + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [], + eof_token: EOF@0..0 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..0 + 0: (empty) + 1: MD_BLOCK_LIST@0..0 + 2: EOF@0..0 "" [] [] + +``` From cf4f167d3bacbf3583dbe9e670bf2d0a52caa829 Mon Sep 17 00:00:00 2001 From: Augustin Mauroy <97875033+AugustinMauroy@users.noreply.github.com> Date: Wed, 26 Feb 2025 16:06:46 +0100 Subject: [PATCH 3/5] test: add not an heading --- crates/biome_markdown_parser/src/lexer/tests.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index 2a46fd75d917..79677c1af876 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -25,6 +25,7 @@ macro_rules! assert_lex { tokens.push((lexer.current(), lexer.current_range())); } + $( assert_eq!( tokens[idx].0, @@ -211,6 +212,19 @@ fn heading_level_6() { } } +#[test] +// todo: this should be a MD_TEXTUAL_LITERAL token +fn not_a_heading() { + assert_lex! { + "############## not-heading", + ERROR_TOKEN:14, + WHITESPACE:1, + MD_TEXTUAL_LITERAL:3, + ERROR_TOKEN:1, + MD_TEXTUAL_LITERAL:7, + } +} + #[test] fn thematic_break_literal() { assert_lex! { From cbfe8c219de1bdd83b8c667abccb9205b1194e05 Mon Sep 17 00:00:00 2001 From: Augustin Mauroy <97875033+AugustinMauroy@users.noreply.github.com> Date: Fri, 28 Feb 2025 19:30:35 +0100 Subject: [PATCH 4/5] update parser --- crates/biome_markdown_parser/src/syntax.rs | 49 +++++++++-- .../src/syntax/atx_headings.rs | 85 +++++++++++++++++++ .../tests/md_test_suite/ok/atx-heading.md | 6 ++ 3 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 crates/biome_markdown_parser/src/syntax/atx_headings.rs create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/atx-heading.md diff --git a/crates/biome_markdown_parser/src/syntax.rs b/crates/biome_markdown_parser/src/syntax.rs index 3ca2c0b9e10f..52efe6b517da 100644 --- a/crates/biome_markdown_parser/src/syntax.rs +++ b/crates/biome_markdown_parser/src/syntax.rs @@ -1,11 +1,13 @@ pub mod thematic_break_block; +pub mod atx_headings; use biome_markdown_syntax::{T, kind::MarkdownSyntaxKind::*}; use biome_parser::{ + prelude::{ParsedSyntax::{self, *}, TokenSource}, Parser, - prelude::ParsedSyntax::{self, *}, }; use thematic_break_block::{at_thematic_break_block, parse_thematic_break_block}; +use atx_headings::{at_atx_heading, parse_atx_heading}; use crate::MarkdownParser; @@ -25,7 +27,9 @@ pub(crate) fn parse_block_list(p: &mut MarkdownParser) -> ParsedSyntax { } pub(crate) fn parse_any_block(p: &mut MarkdownParser) { - if at_indent_code_block(p) { + if at_atx_heading(p) { + let _ = parse_atx_heading(p); + } else if at_indent_code_block(p) { parse_indent_code_block(p); } else if at_thematic_break_block(p) { let break_block = try_parse(p, |p| { @@ -36,8 +40,10 @@ pub(crate) fn parse_any_block(p: &mut MarkdownParser) { Ok(break_block) }); if break_block.is_err() { - parse_paragraph(p); + let _ = parse_paragraph(p); } + } else { + let _ = parse_paragraph(p); } } @@ -49,8 +55,41 @@ pub(crate) fn parse_indent_code_block(_p: &mut MarkdownParser) { todo!() } -pub(crate) fn parse_paragraph(_p: &mut MarkdownParser) { - todo!() +pub(crate) fn parse_paragraph(p: &mut MarkdownParser) -> ParsedSyntax { + let m = p.start(); + + // Parse paragraph content until a blank line, EOF, or another block element + parse_paragraph_line(p); + + // Additional lines in the paragraph + while !p.at(T![EOF]) && + !is_blank_line(p) && + !at_atx_heading(p) && + !at_thematic_break_block(p) && + !at_indent_code_block(p) { + parse_paragraph_line(p); + } + + Present(m.complete(p, MD_PARAGRAPH)) +} + +// Helper to check if we're at a blank line +fn is_blank_line(p: &mut MarkdownParser) -> bool { + // A simple check for a blank line - just newline or whitespace followed by newline + p.at(NEWLINE) || (p.at(WHITESPACE) && p.nth(1) == NEWLINE) +} + +// Renamed to be clearer that this parses a single line of paragraph content +pub(crate) fn parse_paragraph_line(p: &mut MarkdownParser) { + // Parse until end of line or end of file + while !p.at(T![EOF]) && !p.at(NEWLINE) { + p.bump(p.source().current()); + } + + // Consume the newline if present + if p.at(NEWLINE) { + p.bump(NEWLINE); + } } /// Attempt to parse some input with the given parsing function. If parsing diff --git a/crates/biome_markdown_parser/src/syntax/atx_headings.rs b/crates/biome_markdown_parser/src/syntax/atx_headings.rs new file mode 100644 index 000000000000..0b61eec08a4f --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/atx_headings.rs @@ -0,0 +1,85 @@ +use crate::parser::MarkdownParser; +use biome_markdown_syntax::MarkdownSyntaxKind::*; +use biome_markdown_syntax::T; +use biome_parser::{ + prelude::ParsedSyntax::{self, *}, + Parser, + prelude::TokenSource, +}; + +pub(crate) fn at_atx_heading(p: &mut MarkdownParser) -> bool { + // ATX headings start with 1-6 hash characters + if !p.at(T![#]) { + return false; + } + + // Count consecutive hash characters (max 6) + let mut hash_count = 0; + while p.nth(hash_count) == T![#] && hash_count < 6 { + hash_count += 1; + } + + // Must be followed by whitespace or EOL to be a valid heading + let next = p.nth(hash_count); + next == WHITESPACE || next == NEWLINE || next == T![EOF] +} + +pub(crate) fn parse_atx_heading(p: &mut MarkdownParser) -> ParsedSyntax { + let m = p.start(); + + // Parse opening hash marks + let hash_list_m = p.start(); + let mut hash_count = 0; + while p.at(T![#]) && hash_count < 6 { + p.bump(T![#]); + hash_count += 1; + } + hash_list_m.complete(p, MD_HASH_LIST); + + // Skip whitespace after the hash marks + if p.at(WHITESPACE) { + p.bump(WHITESPACE); + } + + // Parse heading content as a paragraph (optional) + if !p.at(NEWLINE) && !p.at(T![EOF]) { + let paragraph_m = p.start(); + + // Parse until end of line, or until trailing hashes + while !p.at(NEWLINE) && !p.at(T![EOF]) && !p.at(T![#]) { + p.bump(p.source().current()); + } + + paragraph_m.complete(p, MD_PARAGRAPH); + } + + // Parse trailing hash marks (optional) + let trailing_hash_list_m = p.start(); + while p.at(T![#]) { + p.bump(T![#]); + } + trailing_hash_list_m.complete(p, MD_HASH_LIST); + + // Skip trailing whitespace + if p.at(WHITESPACE) { + p.bump(WHITESPACE); + } + + // Consume the newline if present + if p.at(NEWLINE) { + p.bump(NEWLINE); + } + + // Complete with the appropriate heading node type based on hash count + let node = match hash_count { + 1 => MD_HEADER1, + 2 => MD_HEADER2, + 3 => MD_HEADER3, + 4 => MD_HEADER4, + 5 => MD_HEADER5, + 6 => MD_HEADER6, + _ => MD_HEADER // Fallback, should not happen + }; + + Present(m.complete(p, node)) +} diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/atx-heading.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/atx-heading.md new file mode 100644 index 000000000000..4c9cb6f4102f --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/atx-heading.md @@ -0,0 +1,6 @@ +# Heading 1 +## Heading 2 +### Heading 3 +#### Heading 4 +##### Heading 5 +###### Heading 6 From ca0524a51c863ce63e299a9a8eb57b678667ea5c Mon Sep 17 00:00:00 2001 From: Augustin Mauroy <97875033+AugustinMauroy@users.noreply.github.com> Date: Tue, 4 Mar 2025 22:30:08 +0100 Subject: [PATCH 5/5] update --- crates/biome_markdown_parser/src/lexer/mod.rs | 19 +++----- .../biome_markdown_parser/src/lexer/tests.rs | 46 +++++++++++++++---- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index a1d42742c911..c419f700361a 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -3,6 +3,7 @@ mod tests; use biome_markdown_syntax::MarkdownSyntaxKind; use biome_markdown_syntax::MarkdownSyntaxKind::*; +use biome_markdown_syntax::T; use biome_parser::diagnostic::ParseDiagnostic; use biome_parser::lexer::{ LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags, @@ -187,21 +188,15 @@ impl<'src> MarkdownLexer<'src> { fn consume_header(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); - let mut level = 0; - while matches!(self.current_byte(), Some(b'#')) { + // Just consume a single hash character and return its token + if matches!(self.current_byte(), Some(b'#')) { self.advance(1); - level += 1; + return T![#]; } - match level { - 1 => MD_HEADER1, - 2 => MD_HEADER2, - 3 => MD_HEADER3, - 4 => MD_HEADER4, - 5 => MD_HEADER5, - 6 => MD_HEADER6, - _ => ERROR_TOKEN, - } + // This shouldn't be reached if this function is called correctly + // but handle the error case anyway + self.consume_textual() } fn text_position(&self) -> TextSize { diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index 79677c1af876..acbe28dcb806 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -25,6 +25,8 @@ macro_rules! assert_lex { tokens.push((lexer.current(), lexer.current_range())); } + // TODO: remove this debug print + println!("tokens: {:#?}", tokens); $( assert_eq!( @@ -145,7 +147,7 @@ fn whitespace() { fn heading_level_1() { assert_lex! { "# Heading 1", - MD_HEADER1:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, } @@ -155,7 +157,7 @@ fn heading_level_1() { fn heading_level_1_with_newline() { assert_lex! { "# Heading 1\n", - MD_HEADER1:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, NEWLINE:1, @@ -166,7 +168,8 @@ fn heading_level_1_with_newline() { fn heading_level_2() { assert_lex! { "## Heading 2", - MD_HEADER2:2, + HASH:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, } @@ -176,7 +179,9 @@ fn heading_level_2() { fn heading_level_3() { assert_lex! { "### Heading 3", - MD_HEADER3:3, + HASH:1, + HASH:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, } @@ -186,7 +191,10 @@ fn heading_level_3() { fn heading_level_4() { assert_lex! { "#### Heading 4", - MD_HEADER4:4, + HASH:1, + HASH:1, + HASH:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, } @@ -196,7 +204,11 @@ fn heading_level_4() { fn heading_level_5() { assert_lex! { "##### Heading 5", - MD_HEADER5:5, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, } @@ -206,7 +218,12 @@ fn heading_level_5() { fn heading_level_6() { assert_lex! { "###### Heading 6", - MD_HEADER6:6, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:9, } @@ -217,7 +234,20 @@ fn heading_level_6() { fn not_a_heading() { assert_lex! { "############## not-heading", - ERROR_TOKEN:14, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, + HASH:1, WHITESPACE:1, MD_TEXTUAL_LITERAL:3, ERROR_TOKEN:1,