-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocess.py
94 lines (83 loc) · 5.07 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from datetime import datetime # get timestamp for filename
from pylatex import Document, NoEscape, escape_latex, Package, Command
from re import sub
from pylatex.base_classes.command import UnsafeCommand
def make_dict(text: str) -> dict:
return dict()
def handle_comments(text: str, comments: bool) -> str:
if comments:
if "<div id=\"comments\">" in text:
text, commentspart = text.split("<div id=\"comments\">", 1)
commentsdict = make_dict(commentspart)
raise NotImplementedError
else:
text = sub("<span class=\"comment [^\"]*\">", "", text)
text = sub("<sup><a href=[^*]*\\*</a></sup></span>", "", text)
text = sub("<div id=\"comments\">.*", "", text)
return text
def saveTex(text: str, style: int, numbers: bool, contenttable: bool, comments: bool) -> str:
# Preprocessing text
_, text = text.split("<body>\n")
text, _ = text.split("</body>")
# escape
text = escape_latex(text)
text = handle_comments(text, comments)
# rewrite to latex
text = text.replace("<br>", "\\\\\n")
text = text.replace("<br/>", "\\\\\n") # escaped slashes -> python docs
text = text.replace("<strong>", "\\textbf{").replace("</strong>", "}")
text = text.replace("<em>", "\\textit{").replace("</em>", "}")
text = text.replace("<u>", "\\underline{").replace("</u>", "}")
text = text.replace("<s>", "\\sout{").replace("</s>", "}")
text = text.replace("\\\\\n<code>", "\\begin{lstlisting}\n").replace("</code>\\\\", "\n\\end{lstlisting}")
if contenttable:
text = text.replace("<h1>", "\\date{}\n\\title{").replace("</h1>\\\\", "}\n\\maketitle\n\\renewcommand*\\contentsname{Inhalt}\n\\tableofcontents") # FIXME: Multiple tableofcontents
else:
text = text.replace("\\\\<h1>", "\\date{}\n\\title{").replace("</h1>\\\\", "}\n\\maketitle")
if numbers:
text = text.replace("\\\\<h2>", "\\section{").replace("</h2>\\\\", "}")
text = text.replace("\\\\<h3>", "\\subsection{").replace("</h3>\\\\", "}")
text = text.replace("\\\\<h4>", "\\subsubsection{").replace("</h4>\\\\", "}")
else:
text = text.replace("\\\\<h2>", "\\section*{").replace("</h2>\\\\", "}")
text = text.replace("\\\\<h3>", "\\subsection*{").replace("</h3>\\\\", "}")
text = text.replace("\\\\<h4>", "\\subsubsection*{").replace("</h4>\\\\", "}")
text = text.replace("\\\\<ol start=\"1\" class=\"number\">", "\\begin{enumerate}").replace("</ol>\\\\", "\\end{enumerate}")
text = text.replace("\\\\<ul class=\"bullet\">", "\\begin{itemize}").replace("\\\\<ul class=\"indent\">", "\\begin{itemize}").replace("</ul>\\\\", "\n\\end{itemize}")
text = text.replace("<li>", "\n\\item ").replace("</li>", "")
# Fallback replacement without \\ or \n required
text = text.replace("<code>", "\\begin{lstlisting}\n").replace("</code>", "\n\\end{lstlisting}")
if contenttable:
text = text.replace("<h1>", "\\date{}\n\\title{").replace("</h1>", "}\n\\maketitle\n\\renewcommand*\\contentsname{Inhalt}\n\\tableofcontents")
else:
text = text.replace("<h1>", "\\date{}\n\\title{").replace("</h1>", "}\n\\maketitle")
if numbers:
text = text.replace("<h2>", "\\section{").replace("</h2>", "}")
text = text.replace("<h3>", "\\subsection{").replace("</h3>", "}")
text = text.replace("<h4>", "\\subsubsection{").replace("</h4>", "}")
else:
text = text.replace("<h2>", "\\section*{").replace("</h2>", "}")
text = text.replace("<h3>", "\\subsection*{").replace("</h3>", "}")
text = text.replace("<h4>", "\\subsubsection*{").replace("</h4>", "}")
text = text.replace("<ol start=\"1\" class=\"number\">", "\\begin{enumerate}").replace("</ol>", "\\end{enumerate}")
text = text.replace("<ul class=\"bullet\">", "\\begin{itemize}").replace("<ul class=\"indent\">", "\\begin{itemize}").replace("</ul>", "\n\\end{itemize}")
text = text.replace("<li>", "\n\\item ").replace("</li>", "")
return text
def getFileName(workingdir, title) -> str:
"""Generates the file name for a downloaded pad dump"""
# build timestamp which is appended to the file to avoid overwriting an existing dump
dt = datetime.now()
timestamp = str(dt.year) + "-" + str(dt.month) + "-" + str(dt.day) + "-" + str(dt.hour) + "-" + str(dt.minute) + "-" + str(dt.second)
# return filename
return workingdir + "/" + title + "-" + timestamp
def TexToPDF(filename: str, tex: str) -> None:
doc = Document()
doc.packages.append(Package('listings'))
doc.packages.append(Package('ulem'))
doc.packages.append(Package('emoji'))
# FIXME: DIsplay emojis
#doc.append(Command("directlua", """luaotfload.add_fallback ("emojifallback", { "Noto Emoji:mode=harf;" })"""))
#doc.append(Command("setmainfont", "texgyretermes-regular", """ BoldFont = texgyretermes-bold, ItalicFont = texgyretermes-italic, BoldItalicFont = texgyretermes-bolditalic, RawFeature={fallback=emojifallback} """))
doc.append(NoEscape(tex))
# print(doc)
doc.generate_pdf(filepath=filename, clean_tex=False, compiler="lualatex")