Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update keyword linking script to handle P603 paragraphs #452

Merged
merged 4 commits into from
Jan 7, 2025
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 76 additions & 3 deletions scripts/python/src/fodt/keyword_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import xml.sax.xmlreader
import xml.sax.saxutils

from dataclasses import dataclass
from pathlib import Path

import click
Expand All @@ -20,6 +21,26 @@ class FileType(enum.Enum):
CHAPTER = 1
SUBSECTION = 2

@dataclass
class MonoParagraphStyle:
style_name: str = ""
loext_graphic_properties: bool = False
style_paragraph_properties: bool = False
style_text_properties: bool = False
libre_mono_font: bool = False
libre_mono_font_size: bool = False

def in_style_style_element(self) -> bool:
return self.style_name != ""

def valid(self) -> bool:
return (self.loext_graphic_properties
and self.style_paragraph_properties
and self.style_text_properties
and self.libre_mono_font
and self.libre_mono_font_size)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hakonhagland: That then means the font size needs to be set to exactly 8pt such that the MonoParagraphStyle is valid, right? How about other font sizes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, the font size might be changed in the future. I suggest we allow any font size to be included in the MonoParagraphStyle



class FileHandler(xml.sax.handler.ContentHandler):
def __init__(
self,
Expand All @@ -35,6 +56,10 @@ def __init__(
# end /> tag instead of the full end tag </tag>
self.start_tag_open = False
self.in_p = False
# Paragraphs with a certain style with monospaced text, should not be linked
self.mono_paragraph_style = MonoParagraphStyle()
self.in_mono_paragraph = False # Inside a paragraph with monospaced text
self.mono_paragraph_styles = set() # Style names that use monospaced text
self.is_example_p = [] # Stack of boolean values: If current p tag is an example
self.p_recursion = 0 # We can have nested p tags
self.in_a = False
Expand All @@ -56,6 +81,12 @@ def __init__(
# A temporary character buffer to store content between start and end tags
self.char_buf = ""

def check_mono_paragraph(self, attrs: xml.sax.xmlreader.AttributesImpl) -> None:
if "text:style-name" in attrs.getNames():
style_name = attrs.getValue("text:style-name")
if style_name in self.mono_paragraph_styles:
self.in_mono_paragraph = True

def compile_regex(self) -> re.Pattern:
# Also include the keyword name itself in the regex pattern, see discussion
# https://github.com/OPM/opm-reference-manual/pull/410
Expand Down Expand Up @@ -91,7 +122,7 @@ def characters(self, content: str):
# and <text:span> tags.
self.char_buf += content

def collect_style(self, attrs: xml.sax.xmlreader.AttributesImpl) -> None:
def collect_example_style(self, attrs: xml.sax.xmlreader.AttributesImpl) -> None:
# Collect the paragraph styles that use fixed width fonts
if "style:name" in attrs.getNames():
style_name = attrs.getValue("style:name")
Expand All @@ -108,6 +139,7 @@ def endElement(self, name: str):
if self.p_recursion == 0:
self.in_p = False
self.is_example_p.pop()
self.in_mono_paragraph = False # Assume this is not recursive
elif name == "text:a":
self.in_a = False
elif name == "text:span":
Expand All @@ -120,6 +152,9 @@ def endElement(self, name: str):
self.in_draw_recursion -= 1
if self.in_draw_recursion == 0:
self.in_draw_frame = False
else: # office:body not found yet
if name == "style:style":
self.maybe_add_mono_paragraph_style()
if self.start_tag_open:
self.content.write("/>")
self.start_tag_open = False
Expand All @@ -137,6 +172,41 @@ def is_table_caption(self, content: str) -> bool:
keyword_name = self.file_info
return re.search(rf'{re.escape(keyword_name)} Keyword Description', content)

def maybe_add_mono_paragraph_style(self) -> None:
if self.mono_paragraph_style.valid():
self.mono_paragraph_styles.add(self.mono_paragraph_style.style_name)
self.mono_paragraph_style = MonoParagraphStyle() # Reset the style

def maybe_collect_mono_paragraph_style(
self, name: str, attrs: xml.sax.xmlreader.AttributesImpl
) -> None:
if name == "style:style":
attr = "style:parent-style-name"
if attr in attrs.getNames():
if attrs.getValue(attr) == "Text_20_body":
attr2 = "style:family"
if attr2 in attrs.getNames():
if attrs.getValue(attr2) == "paragraph":
attr3 = "style:name"
if attr3 in attrs.getNames():
style_name = attrs.getValue(attr3)
self.mono_paragraph_style.style_name = style_name
elif self.mono_paragraph_style.in_style_style_element():
if name == "loext:graphic-properties":
self.mono_paragraph_style.loext_graphic_properties = True
elif name == "style:paragraph-properties":
self.mono_paragraph_style.style_paragraph_properties = True
elif name == "style:text-properties":
self.mono_paragraph_style.style_text_properties = True
attr = "style:font-name"
if attr in attrs.getNames():
if attrs.getValue(attr) == "Liberation Mono":
self.mono_paragraph_style.libre_mono_font = True
attr2 = "fo:font-size"
if attr2 in attrs.getNames():
if attrs.getValue(attr2) == "8pt":
self.mono_paragraph_style.libre_mono_font_size = True

def maybe_write_characters(self) -> None:
if len(self.char_buf) > 0:
# NOTE: We need to escape the content before we apply the regex pattern
Expand All @@ -149,6 +219,7 @@ def maybe_write_characters(self) -> None:
and (not self.in_math)
and (not self.in_binary_data)
and (not self.in_draw_frame)
and (not self.in_mono_paragraph)
):
if not self.is_example_p[-1]:
if (self.file_type == FileType.CHAPTER or
Expand Down Expand Up @@ -183,13 +254,15 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
else:
if name == "style:style":
if "style:parent-style-name" in attrs.getNames():
if attrs.getValue("style:parent-style-name") == "_40_Example":
self.collect_style(attrs)
if attrs.getValue("style:parent-style-name") == "_40_Example":
self.collect_example_style(attrs)
self.maybe_collect_mono_paragraph_style(name, attrs)
else:
if name == "text:p":
self.in_p = True
self.p_recursion += 1
self.update_example_stack(attrs)
self.check_mono_paragraph(attrs)
elif name == "text:a":
# We are inside an anchor, and we should not insert another text:a tag here
self.in_a = True
Expand Down
Loading