-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathgen-view.py
266 lines (209 loc) · 8.59 KB
/
gen-view.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python3
# SPDX-License-Identifier: BSD-3-Clause
import os
import re
from typing import List
import yaml
CHAPTERS = [
"Hardware Software Interface",
"Intro Computer Architecture",
"Memory Layout",
"Memory Security",
]
CHAPTERS_PATH = "chapters/"
viewDir = ".view"
readingDir = f"{viewDir}/reading"
tasksDir = f"{viewDir}/tasks"
guidesDir = f"{viewDir}/guides"
mediaDir = f"{viewDir}/media"
def hyphenate(text: str) -> str:
return "-".join(text.strip().split(" ")).lower()
def prepare_landing_page():
"""Put the landing page in the .view directory."""
os.popen(
f"cp {os.path.join(CHAPTERS_PATH, 'landing-page/README.md')} {viewDir}/landing-page.md"
)
def group_reading():
"""
Group all the reading files in a single directory.
"""
os.makedirs(readingDir, exist_ok=True)
for root, _, files in os.walk(CHAPTERS_PATH):
for f in files:
if "reading" in root and f.endswith(".md"):
os.popen(f"cp {os.path.join(root, f)} {readingDir}/")
def group_media():
"""
Group all the media files in a single directory.
"""
os.makedirs(mediaDir, exist_ok=True)
formats = (".jpg", ".jpeg", ".png", ".gif", ".svg")
for root, _, files in os.walk(CHAPTERS_PATH):
for f in files:
if f.endswith(formats):
os.popen(f"cp {os.path.abspath(os.path.join(root, f))} {mediaDir}/")
def group_tasks():
"""
Group all the tasks README.md files in a single directory.
"""
os.makedirs(tasksDir, exist_ok=True)
for root, _, files in os.walk(CHAPTERS_PATH):
for f in files:
if f == "README.md":
# Copy the README.md file from a task directory
if "tasks" in root:
taskName = os.path.basename(root)
os.popen(f"cp {os.path.join(root, f)} {tasksDir}/{taskName}.md")
def group_guides():
"""
Group all the guides README.md files in a single directory.
"""
os.makedirs(guidesDir, exist_ok=True)
for root, _, files in os.walk(CHAPTERS_PATH):
for f in files:
if f == "README.md":
# Copy the README.md file from a guide directory
if "guides" in root:
guideName = os.path.basename(root)
os.popen(f"cp {os.path.join(root, f)} {guidesDir}/{guideName}.md")
def solve_links(filename: str, fileToLab: dict) -> str:
"""
Make relative links work in the final markdown file.
* Links to media and tasks are solved by referencing the .view directory.
* Links to questions are solved by referencing the Questions section in the same chapter.
* Links to other reading files are solved by determining the lab number and the subchapter.
The lab number is determined by the fileToLab dictionary, and the subchapter is the first line of the file.
For example, [text](../reading/basic-syscall.md) will become [text](.view/lab1#basic-syscall).
"""
with open(filename) as f:
text = f.read()
# Questions from the same chapter are at Questions/<question>, without the .md extension
text = re.sub(r"(\[.*?\])\(.*?questions/(.*?)\.md\)", r"\1(Questions/\2)", text)
# Remove relative links to reading, media, tasks, and guides
for section in ["reading", "media", "tasks", "guides"]:
text = re.sub(
r"(\[.*?\])\([^\)]*" + section + r"/(.*?)\)", rf"\1({section}/\2)", text
)
# Reading links [text](.*/reading/<file>.md) should be replaced with [text](.view/labQ#<chapter>)
# Where Q is the lab number and chapter is the heading of the file
matches = re.findall(r"\[[^\]]*\]\(([^\)]+\.md)\)", text)
for sourceFile in matches:
if sourceFile.startswith("http"): # Skip external links to markdown files
continue
origName = sourceFile # Save the original name for the regex
if sourceFile.endswith("README.md"):
sourceFile = os.path.dirname(sourceFile) + ".md"
filepath = os.path.join(viewDir, sourceFile)
# Tasks and guides are prefixed with the section name
# FIXME: Refactor this.
prefix = ""
if "tasks/" in sourceFile:
prefix = "task-"
elif "guides/" in sourceFile:
prefix = "guide-"
# Get the first line of the file to extract the chapter in hyphenated format
try:
with open(filepath) as f:
title = f.readline().strip("#").replace("`", "").replace(":", "")
subchapter = prefix + hyphenate(title)
except:
print(f"Error: Could not solve link to {filepath} for {filename}")
continue
text = re.sub(
rf"(\[.*\])\({origName}\)", # Use origName because tasks 'sourceFile' has changed
rf"\1({fileToLab[sourceFile]}#{subchapter})",
text,
)
with open(filename, "w") as f:
f.write(text)
def find_broken_links():
"""
Find potentially broken links in the markdown file.
"""
prefixes = ["lab", "media", "tasks", "reading", "guides", "http"]
for root, _, files in os.walk(viewDir):
for f in files:
if "lab" in f: # Skip lab files, check source files only
continue
if f.endswith(".md"):
with open(os.path.join(root, f)) as f:
text = f.read()
# Find all links that do not point to a markdown file
matches = re.findall(r"\[[^\]]*\]\(([^\)]+)\)", text)
for link in matches:
if not any([link.startswith(p) for p in prefixes]):
print(f"Possibly broken link in {f.name}: ({link})")
class Lab:
def __init__(self, title: str, filename: str, content: List[str]):
self.text = f"# {title}\n\n"
for file in content:
self.process_file(file)
print(f"Generating lab {viewDir}/{filename}")
with open(f"{viewDir}/{filename}", "w") as f:
f.write(self.text)
def process_file(self, filename: str):
"""
Process a file and add it to the lab text.
"""
with open(os.path.join(viewDir, filename)) as f:
if "reading/" in filename:
filecontent = f.read()
else:
lines = f.readlines()
# Rename "# Some title" to "## Task: Some title" or "## Guide: Some title"
if "tasks/" in filename:
prefix = "Task"
elif "guides/" in filename:
prefix = "Guide"
lines[0] = f"# {prefix}:{lines[0].strip('#')}\n"
filecontent = "".join(lines)
# Add one more level of indentation to the chapter headings
filecontent = re.sub(r"^(#+)", r"\1#", filecontent, flags=re.MULTILINE)
self.text += filecontent + "\n\n"
class ConfigParser:
def __init__(self, path):
self.fileToLab = None
with open(path) as f:
self.data = yaml.safe_load(f)
def create_labs(self):
for entry in self.data["lab_structure"]:
Lab(entry["title"], entry["filename"], entry["content"])
print() # Add a newline for better readability
def get_file_to_lab_dict(self) -> dict:
"""
Returns a dictionary mapping the source files to the labs they belong to.
This is used to transform relative links in the source files to the corresponding lab.
"""
if self.fileToLab:
return self.fileToLab
self.fileToLab = {}
for lab in self.data["lab_structure"]:
for c in lab["content"]:
self.fileToLab[c] = f'{lab["filename"]}'
return self.fileToLab
def main():
"""
1. Group all media, tasks, and questions in their respective folders in .view.
Will result in .view/media/, .view/tasks/, and .view/questions/ folders.
2. For each lab, concatenate the lab content and tasks.
3. For each lab, solve relative links to media, tasks, and questions.
4. Copy the overview.md file to the .view directory.
"""
# Prepare directories layout
prepare_landing_page()
group_reading()
group_media()
group_tasks()
group_guides()
# Parse the config file
config = ConfigParser("config.yaml")
config.create_labs()
# Solve links recursively in all markdown files
for root, _, files in os.walk(viewDir):
for f in files:
if f.endswith(".md"):
solve_links(os.path.join(root, f), config.get_file_to_lab_dict())
# Check for broken links
find_broken_links()
if __name__ == "__main__":
main()