|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# author: Ole Schuett |
| 4 | + |
| 5 | +from typing import Tuple, List, Optional |
| 6 | +import lxml.etree as ET |
| 7 | +import lxml |
| 8 | +from pathlib import Path |
| 9 | +import re |
| 10 | +import sys |
| 11 | + |
| 12 | +SectionPath = Tuple[str, ...] |
| 13 | + |
| 14 | + |
| 15 | +# ======================================================================================= |
| 16 | +def main() -> None: |
| 17 | + if len(sys.argv) != 3: |
| 18 | + print("generate_input_reference.py <cp2k_input.xml> <references.html>") |
| 19 | + sys.exit(1) |
| 20 | + |
| 21 | + cp2k_input_xml_fn, references_html_fn = sys.argv[1:] |
| 22 | + output_dir = Path(__file__).resolve().parent |
| 23 | + |
| 24 | + build_bibliography(references_html_fn, output_dir) |
| 25 | + build_input_reference(cp2k_input_xml_fn, output_dir) |
| 26 | + |
| 27 | + |
| 28 | +# ======================================================================================= |
| 29 | +def build_bibliography(references_html_fn: str, output_dir: Path) -> None: |
| 30 | + content = Path(references_html_fn).read_text() |
| 31 | + entries = re.findall("<TR>.*?</TR>", content, re.DOTALL) |
| 32 | + |
| 33 | + output = [] |
| 34 | + output += ["%", "% This file was created by generate_input_reference.py", "%"] |
| 35 | + output += [f"# Bibliography", ""] |
| 36 | + |
| 37 | + for entry in entries: |
| 38 | + pattern = '<TR><TD>\[(.*?)\]</TD><TD>\n <A NAME="reference_\d+">(.*?)</A><br>(.*?)</TD></TR>' |
| 39 | + parts = re.search(pattern, entry, re.DOTALL) |
| 40 | + assert parts |
| 41 | + key = parts.group(1) |
| 42 | + title = parts.group(3).strip() |
| 43 | + if "<br>" in parts.group(2): |
| 44 | + m = re.match("(.*?)<br>(.*)", parts.group(2), re.DOTALL) |
| 45 | + assert m |
| 46 | + authors, mix = m.groups() |
| 47 | + else: |
| 48 | + authors, mix = "", parts.group(2) |
| 49 | + |
| 50 | + if "https://doi.org" in mix: |
| 51 | + m = re.match('\s*<A HREF="(.*?)">(.*?)</A>', mix, re.DOTALL) |
| 52 | + assert m |
| 53 | + doi, ref = m.groups() |
| 54 | + else: |
| 55 | + doi, ref = "", mix.strip() |
| 56 | + |
| 57 | + output += [f"({key})=", f"## {key}", ""] |
| 58 | + if doi: |
| 59 | + output += [f"{authors} **{title}** _[{ref}]({doi})_", ""] |
| 60 | + else: |
| 61 | + output += [f"{authors} **{title}** _{ref}_", ""] |
| 62 | + |
| 63 | + # Write output |
| 64 | + filename = output_dir / "bibliography.md" |
| 65 | + filename.write_text("\n".join(output)) |
| 66 | + print(f"Wrote {filename}") |
| 67 | + |
| 68 | + |
| 69 | +# ======================================================================================= |
| 70 | +def build_input_reference(cp2k_input_xml_fn: str, output_dir: Path) -> None: |
| 71 | + tree = ET.parse(cp2k_input_xml_fn) |
| 72 | + root = tree.getroot() |
| 73 | + num_files_written = process_section(root, ("CP2K_INPUT",), output_dir) |
| 74 | + |
| 75 | + # Build landing page. |
| 76 | + cp2k_version = get_text(root.find("CP2K_VERSION")) |
| 77 | + compile_revision = get_text(root.find("COMPILE_REVISION")) |
| 78 | + # cp2k_year = get_text(root.find("CP2K_YEAR")) |
| 79 | + # compile_date = get_text(root.find("COMPILE_DATE")) |
| 80 | + |
| 81 | + output = [] |
| 82 | + output += ["%", "% This file was created by generate_input_reference.py", "%"] |
| 83 | + output += [f"# Input reference", ""] |
| 84 | + |
| 85 | + assert compile_revision.startswith("git:") |
| 86 | + github_url = f"https://github.com/cp2k/cp2k/tree/{compile_revision[4:]}" |
| 87 | + output += [f"Based on {cp2k_version} ([{compile_revision}]({github_url}))", ""] |
| 88 | + |
| 89 | + output += ["```{toctree}"] |
| 90 | + output += [":maxdepth: 1"] |
| 91 | + output += [":titlesonly:"] |
| 92 | + output += [":caption: Top-level sections"] |
| 93 | + output += [":glob:", ""] |
| 94 | + output += ["CP2K_INPUT/*", ""] |
| 95 | + |
| 96 | + # Write output |
| 97 | + filename = output_dir / "CP2K_INPUT.md" # Overwrite generic file. |
| 98 | + filename.write_text("\n".join(output)) |
| 99 | + print(f"Wrote markdown files for {num_files_written} input sections.") |
| 100 | + |
| 101 | + |
| 102 | +# ======================================================================================= |
| 103 | +def process_section( |
| 104 | + section: lxml.etree._Element, section_path: SectionPath, output_dir: Path |
| 105 | +) -> int: |
| 106 | + # Find more section fields. |
| 107 | + repeats = "repeats" in section.attrib and section.attrib["repeats"] == "yes" |
| 108 | + description = get_text(section.find("DESCRIPTION")) |
| 109 | + location = get_text(section.find("LOCATION")) |
| 110 | + section_name = section_path[-1] # section.find("NAME") doesn't work for root |
| 111 | + |
| 112 | + # Find section references. |
| 113 | + references = [get_text(ref.find("NAME")) for ref in section.findall("REFERENCE")] |
| 114 | + |
| 115 | + output = [] |
| 116 | + output += ["%", "% This file was created by generate_input_reference.py", "%"] |
| 117 | + output += [f"# {section_name}", ""] |
| 118 | + if repeats: |
| 119 | + output += [f"**Section can be repeated.**", ""] |
| 120 | + if references: |
| 121 | + citations = ", ".join([f"{{ref}}`{r}`" for r in references]) |
| 122 | + output += [ |
| 123 | + f"**References:** {citations}", |
| 124 | + "", |
| 125 | + ] |
| 126 | + output += [f"{escape_markdown(description)} {github_link(location)}", ""] |
| 127 | + |
| 128 | + # Render TOC |
| 129 | + if section.findall("SECTION"): |
| 130 | + output += ["```{toctree}"] |
| 131 | + output += [":maxdepth: 1"] |
| 132 | + output += [":titlesonly:"] |
| 133 | + output += [":caption: Subsections"] |
| 134 | + output += [":glob:", ""] |
| 135 | + output += [f"{section_name}/*"] # TODO maybe list subsection explicitly. |
| 136 | + output += ["```", ""] |
| 137 | + |
| 138 | + # Render keywords |
| 139 | + keywords = ( |
| 140 | + section.findall("SECTION_PARAMETERS") |
| 141 | + + section.findall("DEFAULT_KEYWORD") |
| 142 | + + section.findall("KEYWORD") |
| 143 | + ) |
| 144 | + if keywords: |
| 145 | + output += [f"## Keywords", ""] |
| 146 | + for keyword in keywords: |
| 147 | + output += render_keyword(keyword, section_path) |
| 148 | + |
| 149 | + # Write output |
| 150 | + section_dir = output_dir / "/".join(section_path[:-1]) |
| 151 | + section_dir.mkdir(exist_ok=True) |
| 152 | + filename = section_dir / f"{section_name}.md" |
| 153 | + filename.write_text("\n".join(output)) |
| 154 | + num_files_written = 1 |
| 155 | + |
| 156 | + # Process subsections |
| 157 | + for subsection in section.findall("SECTION"): |
| 158 | + subsection_name_element = subsection.find("NAME") |
| 159 | + subsection_name = get_text(subsection.find("NAME")) |
| 160 | + subsection_path = (*section_path, subsection_name) |
| 161 | + num_files_written += process_section(subsection, subsection_path, output_dir) |
| 162 | + |
| 163 | + return num_files_written |
| 164 | + |
| 165 | + |
| 166 | +# ======================================================================================= |
| 167 | +def render_keyword( |
| 168 | + keyword: lxml.etree._Element, section_path: SectionPath |
| 169 | +) -> List[str]: |
| 170 | + # Find keyword names. |
| 171 | + keyword_names: List[str] |
| 172 | + if keyword.tag == "SECTION_PARAMETERS": |
| 173 | + keyword_names = ["SECTION_PARAMETERS"] |
| 174 | + elif keyword.tag == "DEFAULT_KEYWORD": |
| 175 | + keyword_names = ["DEFAULT_KEYWORD"] |
| 176 | + else: |
| 177 | + keyword_names = [get_text(name) for name in keyword.findall("NAME")] |
| 178 | + assert keyword_names |
| 179 | + |
| 180 | + # Find more keyword fields. |
| 181 | + default_value = get_text(keyword.find("DEFAULT_VALUE")) |
| 182 | + usage = get_text(keyword.find("USAGE")) |
| 183 | + description = get_text(keyword.find("DESCRIPTION")) |
| 184 | + location = get_text(keyword.find("LOCATION")) |
| 185 | + lone_leyword_value = get_text(keyword.find("LONE_KEYWORD_VALUE")) |
| 186 | + |
| 187 | + # Find keyword data type. |
| 188 | + data_type_element = keyword.find("DATA_TYPE") |
| 189 | + assert data_type_element is not None |
| 190 | + data_type = data_type_element.attrib["kind"] |
| 191 | + if data_type == "word": |
| 192 | + data_type = "string" |
| 193 | + if data_type == "keyword": |
| 194 | + data_type = "enum" |
| 195 | + |
| 196 | + # Need to distiguish between multiple values (n_var) and repeating keyword. |
| 197 | + repeats = keyword.attrib["repeats"] == "yes" |
| 198 | + n_var = int(get_text(data_type_element.find("N_VAR"))) |
| 199 | + |
| 200 | + # Find keyword references. |
| 201 | + references = [get_text(ref.find("NAME")) for ref in keyword.findall("REFERENCE")] |
| 202 | + |
| 203 | + # Skip removed keywords. |
| 204 | + if keyword.attrib.get("removed", "no") == "yes": |
| 205 | + print(f"Skipping removed keyword: {keyword_names[0]}") |
| 206 | + return [] |
| 207 | + |
| 208 | + # To get references to work we'd have to encode the `section_path` as `:module:`. |
| 209 | + # We could then also set `add_module_names = False` in the config and re-enable |
| 210 | + # the warnings for the sphinx.domains.python module. |
| 211 | + # However, the links would not be backwards compatible. A solution might be |
| 212 | + # a combinations of explicit targets and myst_heading_slug_func in the config. |
| 213 | + output: List[str] = [] |
| 214 | + output += [f"```{{py:data}} {keyword_names[0]}"] |
| 215 | + n_var_brackets = f"[{n_var}]" if n_var > 1 else "" |
| 216 | + output += [f":type: {data_type}{n_var_brackets}"] |
| 217 | + if default_value: |
| 218 | + output += [f":value: {default_value}"] |
| 219 | + output += [""] |
| 220 | + if len(keyword_names) > 1: |
| 221 | + aliases = " ,".join(keyword_names) |
| 222 | + output += [f"**Aliase:** {aliases}"] |
| 223 | + if repeats: |
| 224 | + output += [f"**Keyword can be repeated.**", ""] |
| 225 | + if lone_leyword_value: |
| 226 | + output += [f"**Lone keyword:** `{escape_markdown(lone_leyword_value)}`", ""] |
| 227 | + if usage: |
| 228 | + output += [ |
| 229 | + f"**Usage:** _{escape_markdown(usage)}_", |
| 230 | + "", |
| 231 | + ] |
| 232 | + if data_type == "enum": |
| 233 | + output += [f"**Valid values:**"] |
| 234 | + for item in keyword.findall("DATA_TYPE/ENUMERATION/ITEM"): |
| 235 | + item_name = get_text(item.find("NAME")) |
| 236 | + item_description = get_text(item.find("DESCRIPTION")) |
| 237 | + output += [f"* `{item_name}` {escape_markdown(item_description)}"] |
| 238 | + output += [""] |
| 239 | + if references: |
| 240 | + citations = ", ".join([f"{{ref}}`{r}`" for r in references]) |
| 241 | + output += [ |
| 242 | + f"**References:** {citations}", |
| 243 | + "", |
| 244 | + ] |
| 245 | + output += [f"{escape_markdown(description)} {github_link(location)}", ""] |
| 246 | + |
| 247 | + output += ["```", ""] # Close py:data directive. |
| 248 | + |
| 249 | + return output |
| 250 | + |
| 251 | + |
| 252 | +# ======================================================================================= |
| 253 | +def get_text(element: Optional[lxml.etree._Element]) -> str: |
| 254 | + if element is not None: |
| 255 | + if element.text is not None: |
| 256 | + return element.text |
| 257 | + return "" |
| 258 | + |
| 259 | + |
| 260 | +# ======================================================================================= |
| 261 | +def escape_markdown(text: str) -> str: |
| 262 | + text = text.replace("__", "\_\_") |
| 263 | + return text |
| 264 | + |
| 265 | + |
| 266 | +# ======================================================================================= |
| 267 | +def github_link(location: str) -> str: |
| 268 | + if not location: |
| 269 | + return "" |
| 270 | + location_url = location.replace(":", "#L") |
| 271 | + github_url = f"https://github.com/cp2k/cp2k/blob/master/src/{location_url}" |
| 272 | + return f"<small>[[Edit on GitHub]({github_url})]</small>" |
| 273 | + |
| 274 | + |
| 275 | +# ======================================================================================= |
| 276 | + |
| 277 | +main() |
| 278 | + |
| 279 | +# EOF |
0 commit comments