diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 444cd6e..a17ffd8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,7 @@ History 0.18.0 (2024-02-09) ------------------- -- urllib.quote_plus the url attributes +- html.escape all the url attributes - Match link domain more precisely. - Image height or width can be individually specified diff --git a/tests/data/html/audio-no_caption.html b/tests/data/html/audio-no_caption.html index 2089158..7bd5782 100644 --- a/tests/data/html/audio-no_caption.html +++ b/tests/data/html/audio-no_caption.html @@ -1,4 +1,4 @@ -
+
diff --git a/tests/data/html/audio.html b/tests/data/html/audio.html index 10bfd1c..a344dca 100644 --- a/tests/data/html/audio.html +++ b/tests/data/html/audio.html @@ -1,4 +1,4 @@ -
+
diff --git a/tests/data/html/code_block.html b/tests/data/html/code_block.html index 5aea722..b5aaefd 100644 --- a/tests/data/html/code_block.html +++ b/tests/data/html/code_block.html @@ -40,4 +40,4 @@ num1 = 54 num2 = 24 -print("The L.C.M. is", compute_lcm(num1, num2))

Readability counts.

Zen of Python By Tom Peters

\ No newline at end of file +print("The L.C.M. is", compute_lcm(num1, num2))

Readability counts.

Zen of Python By Tom Peters

\ No newline at end of file diff --git a/tests/data/html/data_attributes.html b/tests/data/html/data_attributes.html index 3f8a132..ed202d1 100644 --- a/tests/data/html/data_attributes.html +++ b/tests/data/html/data_attributes.html @@ -1 +1 @@ -
Sleepy Kitten
Cute Kitty
Happy Kitten
New Kitty
\ No newline at end of file +
Sleepy Kitten
Cute Kitty
Happy Kitten
New Kitty
\ No newline at end of file diff --git a/tests/data/html/document-pdf.html b/tests/data/html/document-pdf.html index 3f5b6fa..566846d 100644 --- a/tests/data/html/document-pdf.html +++ b/tests/data/html/document-pdf.html @@ -13,7 +13,7 @@
This is pdf caption.

Document Test

\ No newline at end of file diff --git a/tests/data/html/document-sketch.html b/tests/data/html/document-sketch.html index a0a9913..085580c 100644 --- a/tests/data/html/document-sketch.html +++ b/tests/data/html/document-sketch.html @@ -13,7 +13,7 @@
Above URL may result into 404 :)

Document Test

\ No newline at end of file diff --git a/tests/data/html/featuredimage-height_width.html b/tests/data/html/featuredimage-height_width.html index dc801b6..d14ea8d 100644 --- a/tests/data/html/featuredimage-height_width.html +++ b/tests/data/html/featuredimage-height_width.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/data/html/featuredimage-mime_type.html b/tests/data/html/featuredimage-mime_type.html index 7729028..397aed0 100644 --- a/tests/data/html/featuredimage-mime_type.html +++ b/tests/data/html/featuredimage-mime_type.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/data/html/featuredimage-missing_caption.html b/tests/data/html/featuredimage-missing_caption.html index c9b5d16..01c67be 100644 --- a/tests/data/html/featuredimage-missing_caption.html +++ b/tests/data/html/featuredimage-missing_caption.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/data/html/featuredimage-no_caption.html b/tests/data/html/featuredimage-no_caption.html index c9b5d16..01c67be 100644 --- a/tests/data/html/featuredimage-no_caption.html +++ b/tests/data/html/featuredimage-no_caption.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/data/html/featuredimage.html b/tests/data/html/featuredimage.html index eb743a3..35f7501 100644 --- a/tests/data/html/featuredimage.html +++ b/tests/data/html/featuredimage.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/data/html/image-height_width.html b/tests/data/html/image-height_width.html index a458c92..8c7d3a1 100644 --- a/tests/data/html/image-height_width.html +++ b/tests/data/html/image-height_width.html @@ -1 +1 @@ -
Sleepy Kitten
Cute Kitty
\ No newline at end of file +
Sleepy Kitten
Cute Kitty
\ No newline at end of file diff --git a/tests/data/html/image-mime_type.html b/tests/data/html/image-mime_type.html index 5c22f9a..1d70420 100644 --- a/tests/data/html/image-mime_type.html +++ b/tests/data/html/image-mime_type.html @@ -1 +1 @@ -
python
\ No newline at end of file +
python
\ No newline at end of file diff --git a/tests/data/html/image-missing_caption.html b/tests/data/html/image-missing_caption.html index 21d756b..07a2506 100644 --- a/tests/data/html/image-missing_caption.html +++ b/tests/data/html/image-missing_caption.html @@ -1 +1 @@ -
Sleepy Kitten
\ No newline at end of file +
Sleepy Kitten
\ No newline at end of file diff --git a/tests/data/html/image-no_caption.html b/tests/data/html/image-no_caption.html index 21d756b..07a2506 100644 --- a/tests/data/html/image-no_caption.html +++ b/tests/data/html/image-no_caption.html @@ -1 +1 @@ -
Sleepy Kitten
\ No newline at end of file +
Sleepy Kitten
\ No newline at end of file diff --git a/tests/data/html/image.html b/tests/data/html/image.html index 36d73b5..f0adfa1 100644 --- a/tests/data/html/image.html +++ b/tests/data/html/image.html @@ -1 +1 @@ -
Sleepy Kitten
<script>alert(23)</script>Cute Kitty
\ No newline at end of file +
Sleepy Kitten
<script>alert(23)</script>Cute Kitty
\ No newline at end of file diff --git a/tests/data/html/mark_tags.html b/tests/data/html/mark_tags.html index fae3437..bce70c6 100644 --- a/tests/data/html/mark_tags.html +++ b/tests/data/html/mark_tags.html @@ -1 +1 @@ -

This is bold text, this is italic text, this is <script>alert('hello');</script> italic and bold text and this has a link

\ No newline at end of file +

This is bold text, this is italic text, this is <script>alert('hello');</script> italic and bold text and this has a link

\ No newline at end of file diff --git a/tests/data/json/image.json b/tests/data/json/image.json index 55d981d..5720172 100644 --- a/tests/data/json/image.json +++ b/tests/data/json/image.json @@ -4,10 +4,13 @@ { "type": "image", "attrs": { - "src": { "image": "https://placekitten.com/200/301", "fallback": "https://placekitten.com/198/654" }, + "src": { + "image": "https://placekitten.com/200/301?text=\"escape me\"", + "fallback": "https://placekitten.com/198/654" + }, "alt": "Sleepy Kitten", "caption": "Cute Kitty" } } - ] + ] } diff --git a/tests/test_transform.py b/tests/test_transform.py index d3b3de7..c5a67c8 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -66,6 +66,14 @@ def build_test_data(): with open(file_path) as f: data = f.read() store[data_type][file.split(f".{data_type}")[0]] = data + + ## Use this to (re)generate the html files + # if data_type == "json": + # renderer = tiptapy.BaseDoc(config) + # rendered = renderer.render(data) + # with open(file_path.replace("json", "html"), "w") as f: + # f.write(rendered) + return store["json"], store["html"] diff --git a/tiptapy/__init__.py b/tiptapy/__init__.py index 3f3a575..4cad490 100644 --- a/tiptapy/__init__.py +++ b/tiptapy/__init__.py @@ -12,7 +12,6 @@ get_audio_player_block, get_doc_block, make_img_src, - quote_plus, ) __version__ = "0.18.0" @@ -31,9 +30,10 @@ def init_env(path, config): env.globals["handle_links"] = build_link_handler(config) # Cause jinja2 `e` filter is not exactly same as html.escape env.globals["escape"] = escape + env.filters["escape"] = escape + env.filters["str"] = str env.globals["get_audio_player_block"] = get_audio_player_block env.globals["get_doc_block"] = get_doc_block - env.filters["quote_plus"] = quote_plus return env diff --git a/tiptapy/image.py b/tiptapy/image.py index 324ba3f..09b2cb0 100644 --- a/tiptapy/image.py +++ b/tiptapy/image.py @@ -1,26 +1,27 @@ # Image file type and it's MIME type mappings that are suported by tiptapy. # Detailed documentation can be found about Image file type and format guide. # Link: https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Image_types +from html import escape from os.path import splitext -class SupportedFormatsMapper(dict): +class SupportedFormatsMapper(dict): def __missing__(self, ext): - return 'image' + return "image" SUPPORTED_FORMATS_MAP = SupportedFormatsMapper( - PNG='image/png', - JPG='image/jpeg', - JPEG='image/jpeg', - GIF='image/gif', - BMP='image/bmp', - WEBP='image/webp', - SVG='image/svg+xml' + PNG="image/png", + JPG="image/jpeg", + JPEG="image/jpeg", + GIF="image/gif", + BMP="image/bmp", + WEBP="image/webp", + SVG="image/svg+xml", ) def url2mime(url): ext = splitext(url)[-1] - ext = (ext[1:] if ext.startswith('.') else ext).upper() - return SUPPORTED_FORMATS_MAP[ext] + ext = (ext[1:] if ext.startswith(".") else ext).upper() + return escape(SUPPORTED_FORMATS_MAP[ext]) diff --git a/tiptapy/macros.py b/tiptapy/macros.py index ded7a05..96e44f4 100644 --- a/tiptapy/macros.py +++ b/tiptapy/macros.py @@ -1,36 +1,35 @@ import pkgutil from html import escape from string import Template -from urllib.parse import quote_plus, urlparse +from urllib.parse import urlparse def make_img_src(attrs): - alt = attrs.get("alt", "").strip() - height = attrs.get("height", "") - width = attrs.get("width", "") - fallback_url = quote_plus(attrs["src"]["fallback"]).strip() - image_src = f'img src="{fallback_url}"' + alt = escape(attrs.get("alt", "").strip()) + height = escape(str(attrs.get("height", ""))) + width = escape(str(attrs.get("width", ""))) + fallback_url = escape(attrs["src"]["fallback"].strip()) + img = f'img src="{fallback_url}"' if alt: - image_src += f' alt="{escape(alt)}"' + img += f' alt="{alt}"' if width: - image_src += f' width="{width}"' + img += f' width="{width}"' if height: - image_src += f' height="{height}"' + img += f' height="{height}"' - return image_src + return img def build_link_handler(config): def handle_links(attrs): retval = None if attrs: - url = quote_plus(attrs.pop("href", "")).strip() + url = attrs.get("href", "").strip() link = urlparse(url) if not ( link.netloc == config.DOMAIN or link.netloc.endswith(f".{config.DOMAIN}") ): - attrs["href"] = url attrs["target"] = "_blank" attrs["rel"] = "noopener nofollow" retval = " ".join( diff --git a/tiptapy/templates/extras/audio.html b/tiptapy/templates/extras/audio.html index 1d6d56f..65674f6 100644 --- a/tiptapy/templates/extras/audio.html +++ b/tiptapy/templates/extras/audio.html @@ -1,12 +1,13 @@ {%- if node.attrs.src -%} - {%- set caption = node.attrs.caption|trim -%} + {%- set caption = node.attrs.caption|trim|escape -%} {%- set audio_player_block = get_audio_player_block() -%} + {%- set src = node.attrs.src|trim|escape -%}
{%- if caption -%} -
{{audio_player_block}}
{{escape(caption)}}
+
{{audio_player_block}}
{{caption}}
{%- else -%} -
{{audio_player_block}}
+
{{audio_player_block}}
{%- endif -%}
{%- endif -%} diff --git a/tiptapy/templates/extras/document.html b/tiptapy/templates/extras/document.html index 914f8d6..160cecb 100644 --- a/tiptapy/templates/extras/document.html +++ b/tiptapy/templates/extras/document.html @@ -1,13 +1,13 @@ {%- set caption = node.attrs.caption|trim -%} -{%- set src = node.attrs.src|trim|quote_plus -%} +{%- set src = node.attrs.src|trim -%} {%- set size = node.attrs.size|trim -%} -{%- set fname = node.attrs.name|trim|quote_plus -%} -{%- set ext = node.attrs.format|trim -%} +{%- set fname = node.attrs.name|trim -%} +{%- set ext = node.attrs.format -%} {%- if src and size and fname and ext -%} {%- set doc_block = get_doc_block(ext, fname, size, src) -%}
{%- if caption -%} - {{doc_block}}
{{escape(caption)}}
+ {{doc_block}}
{{caption|escape}}
{%- else -%} {{doc_block}} {%- endif -%} diff --git a/tiptapy/templates/extras/featuredimage.html b/tiptapy/templates/extras/featuredimage.html index e153015..590c1d8 100644 --- a/tiptapy/templates/extras/featuredimage.html +++ b/tiptapy/templates/extras/featuredimage.html @@ -1,18 +1,18 @@ {%- if node.attrs.src -%} - {%- set image_url = node.attrs.src.image|trim|quote_plus -%} + {%- set image_url = node.attrs.src.image|trim|escape -%} {%- set image_type = url2mime(image_url) -%} - {%- set fallback_url = node.attrs.src.fallback|trim|quote_plus -%} + {%- set fallback_url = node.attrs.src.fallback|trim|escape -%} {%- set fallback_type = url2mime(fallback_url) -%} - {%- set caption = node.attrs.caption|trim -%} - {%- set alt = node.attrs.alt|trim -%} - {%- set height = node.attrs.height -%} - {%- set width = node.attrs.width -%} + {%- set caption = node.attrs.caption|trim|escape -%} + {%- set alt = node.attrs.alt|trim|escape -%} + {%- set height = node.attrs.height|str|escape -%} + {%- set width = node.attrs.width|str|escape -%} {%- if image_url or fallback_url -%} {%- endif -%} diff --git a/tiptapy/templates/image.html b/tiptapy/templates/image.html index 3108df7..ebd4f4f 100644 --- a/tiptapy/templates/image.html +++ b/tiptapy/templates/image.html @@ -1,17 +1,17 @@ {%- if node.attrs.src -%} - {%- set caption = node.attrs.caption|trim -%} + {%- set caption = node.attrs.caption|trim|escape -%} {%- set alt = node.attrs.alt|trim -%} {%- set height = node.attrs.height -%} {%- set width = node.attrs.width -%} - {%- set image_url = node.attrs.src.image|trim|quote_plus -%} + {%- set image_url = node.attrs.src.image|trim|escape -%} {%- set image_type = url2mime(image_url) -%} - {%- set fallback_url = node.attrs.src.fallback|trim|quote_plus -%} + {%- set fallback_url = node.attrs.src.fallback|trim|escape -%} {%- set fallback_type = url2mime(fallback_url) -%} {%- if image_url or fallback_url -%}
<{{ make_img_src(node.attrs) }}/> {%- if caption -%} -
{{ escape(caption) }}
+
{{ caption }}
{%- endif -%}
{%- endif -%}