forked from OpenBMB/ChatDev
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkdown_parser.py
36 lines (36 loc) · 1.54 KB
/
markdown_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
'''
This file contains the MarkdownParser class responsible for parsing markdown syntax.
'''
import re
class MarkdownParser:
def parse(self, markdown_content):
parsed_content = []
lines = markdown_content.split("\n")
for line in lines:
if line.startswith("#"):
parsed_content.append(self.parse_heading(line))
elif line.startswith("*") or line.startswith("-"):
parsed_content.append(self.parse_list(line))
elif line.startswith(">"):
parsed_content.append(self.parse_blockquote(line))
elif line.startswith("`"):
parsed_content.append(self.parse_code_block(line))
else:
parsed_content.append(self.parse_paragraph(line))
return parsed_content
def parse_heading(self, line):
level = line.count("#")
content = line.strip("#").strip()
return {"type": "heading", "level": level, "content": content}
def parse_list(self, line):
ordered = line.startswith("1.")
items = [item.strip() for item in re.split(r"[*-]\s", line) if item.strip()]
return {"type": "list", "ordered": ordered, "content": items}
def parse_blockquote(self, line):
content = line.strip(">")
return {"type": "blockquote", "content": content}
def parse_code_block(self, line):
content = line.strip("`")
return {"type": "code_block", "content": content}
def parse_paragraph(self, line):
return {"type": "paragraph", "content": line}